ReactOS 0.4.15-dev-6056-gb29b268
asmpp.cpp
Go to the documentation of this file.
1/*
2 * PROJECT: ReactOS host tools
3 * LICENSE: MIT (https://spdx.org/licenses/MIT)
4 * PURPOSE: ASM preprocessor
5 * COPYRIGHT: Copyright 2021 Timo Kreuzer <timo.kreuzer@reactos.org>
6 */
7
8// Optimize even on debug builds, because otherwise it's ridiculously slow
9#ifdef _MSC_VER
10#pragma optimize("gst", on)
11#pragma auto_inline(on)
12#else
13#pragma GCC optimize("O3,inline")
14#endif
15
16#include "tokenizer.hpp"
17#include <cstdlib>
18#include <cstdio>
19#include <sstream>
20#include <ctime>
21
22#define PROFILING_ENABLED 0
23
24using namespace std;
25
27
29{
30 Invalid = -1,
38
46
61
65
77
80
93
96};
97
98int fake_printf(const char* format, ...)
99{
100 return 0;
101}
102
103//#define printf fake_printf
104
105// Use a look-ahead for following characters, not included into the match
106//#define FOLLOWED_BY(x) R"((?=)" x R"())"
107#define FOLLOWED_BY(x) x
108
109#define ANY_CHAR R"((?:.|\n))"
110#define WHITESPACE R"((?:[ \t]++))"
111#define NEWLINE R"([\n])"
112#define WS_OR_NL R"((?:)" WHITESPACE "|" NEWLINE R"()+)"
113#define SEPARATOR R"([\s,\=\+\-\*\/\:\~\[\]])"
114
115#define INSTRUCTION \
116 "AAA|AAD|AAM|AAS|ADC|ADCX|ADD|ADDPD|ADDPS|ADDSD|ADDSS|ADDSUBPD|ADDSUBPS|" \
117 "ADOX|AESDEC|AESDECLAST|AESENC|AESENCLAST|AESIMC|AESKEYGENASSIST|AND|ANDN|" \
118 "ANDNPD|ANDNPS|ANDPD|ANDPS|ARPL|BEXTR|BLENDPD|BLENDPS|BLENDVPD|BLENDVPS|" \
119 "BLSI|BLSMSK|BLSR|BNDCL|BNDCN|BNDCU|BNDLDX|BNDMK|BNDMOV|BNDSTX|BOUND|BSF|" \
120 "BSR|BSWAP|BT|BTC|BTR|BTS|BZHI|CALL|CBW|CDQ|CDQE|CLAC|CLC|CLD|CLDEMOTE|" \
121 "CLFLUSH|CLFLUSHOPT|CLI|CLTS|CLWB|CMC|CMOVcc|CMP|CMPPD|CMPPS|CMPS|CMPSB|" \
122 "CMPSD|CMPSQ|CMPSS|CMPSW|CMPXCHG|CMPXCHG16B|CMPXCHG8B|COMISD|COMISS|CPUID|" \
123 "CQO|CRC32|CVTDQ2PD|CVTDQ2PS|CVTPD2DQ|CVTPD2PI|CVTPD2PS|CVTPI2PD|CVTPI2PS|" \
124 "CVTPS2DQ|CVTPS2PD|CVTPS2PI|CVTSD2SI|CVTSD2SS|CVTSI2SD|CVTSI2SS|CVTSS2SD|" \
125 "CVTSS2SI|CVTTPD2DQ|CVTTPD2PI|CVTTPS2DQ|CVTTPS2PI|CVTTSD2SI|CVTTSS2SI|CWD|" \
126 "CWDE|DAA|DAS|DEC|DIV|DIVPD|DIVPS|DIVSD|DIVSS|DPPD|DPPS|EMMS|ENTER|" \
127 "EXTRACTPS|F2XM1|FABS|FADD|FADDP|FBLD|FBSTP|FCHS|FCLEX|FCMOVcc|FCOM|FCOMI|" \
128 "FCOMIP|FCOMP|FCOMPP|FCOS|FDECSTP|FDIV|FDIVP|FDIVR|FDIVRP|FFREE|FIADD|" \
129 "FICOM|FICOMP|FIDIV|FIDIVR|FILD|FIMUL|FINCSTP|FINIT|FIST|FISTP|FISTTP|" \
130 "FISUB|FISUBR|FLD|FLD1|FLDCW|FLDENV|FLDL2E|FLDL2T|FLDLG2|FLDLN2|FLDPI|" \
131 "FLDZ|FMUL|FMULP|FNCLEX|FNINIT|FNOP|FNSAVE|FNSTCW|FNSTENV|FNSTSW|FPATAN|" \
132 "FPREM|FPREM1|FPTAN|FRNDINT|FRSTOR|FSAVE|FSCALE|FSIN|FSINCOS|FSQRT|FST|" \
133 "FSTCW|FSTENV|FSTP|FSTSW|FSUB|FSUBP|FSUBR|FSUBRP|FTST|FUCOM|FUCOMI|" \
134 "FUCOMIP|FUCOMP|FUCOMPP|FWAIT|FXAM|FXCH|FXRSTOR|FXSAVE|FXTRACT|FYL2X|" \
135 "FYL2XP1|GF2P8AFFINEINVQB|GF2P8AFFINEQB|GF2P8MULB|HADDPD|HADDPS|HLT|" \
136 "HSUBPD|HSUBPS|IDIV|IMUL|IN|INC|INS|INSB|INSD|INSERTPS|INSW|INT|INT1|INT3|" \
137 "INTO|INVD|INVLPG|INVPCID|IRET|IRETD|JMP|Jcc|KADDB|KADDD|KADDQ|KADDW|" \
138 "KANDB|KANDD|KANDNB|KANDND|KANDNQ|KANDNW|KANDQ|KANDW|KMOVB|KMOVD|KMOVQ|" \
139 "KMOVW|KNOTB|KNOTD|KNOTQ|KNOTW|KORB|KORD|KORQ|KORTESTB|KORTESTD|KORTESTQ|" \
140 "KORTESTW|KORW|KSHIFTLB|KSHIFTLD|KSHIFTLQ|KSHIFTLW|KSHIFTRB|KSHIFTRD|" \
141 "KSHIFTRQ|KSHIFTRW|KTESTB|KTESTD|KTESTQ|KTESTW|KUNPCKBW|KUNPCKDQ|KUNPCKWD|" \
142 "KXNORB|KXNORD|KXNORQ|KXNORW|KXORB|KXORD|KXORQ|KXORW|LAHF|LAR|LDDQU|" \
143 "LDMXCSR|LDS|LEA|LEAVE|LES|LFENCE|LFS|LGDT|LGS|LIDT|LLDT|LMSW|LOCK|LODS|" \
144 "LODSB|LODSD|LODSQ|LODSW|LOOP|LOOPcc|LSL|LSS|LTR|LZCNT|MASKMOVDQU|MASKMOVQ|" \
145 "MAXPD|MAXPS|MAXSD|MAXSS|MFENCE|MINPD|MINPS|MINSD|MINSS|MONITOR|MOV|MOVAPD|" \
146 "MOVAPS|MOVBE|MOVD|MOVDDUP|MOVDIR64B|MOVDIRI|MOVDQ2Q|MOVDQA|MOVDQU|MOVHLPS|" \
147 "MOVHPD|MOVHPS|MOVLHPS|MOVLPD|MOVLPS|MOVMSKPD|MOVMSKPS|MOVNTDQ|MOVNTDQA|" \
148 "MOVNTI|MOVNTPD|MOVNTPS|MOVNTQ|MOVQ|MOVQ2DQ|MOVS|MOVSB|MOVSD|MOVSHDUP|" \
149 "MOVSLDUP|MOVSQ|MOVSS|MOVSW|MOVSX|MOVSXD|MOVUPD|MOVUPS|MOVZX|MPSADBW|MUL|" \
150 "MULPD|MULPS|MULSD|MULSS|MULX|MWAIT|NEG|NOP|NOT|OR|ORPD|ORPS|OUT|OUTS|" \
151 "OUTSB|OUTSD|OUTSW|PABSB|PABSD|PABSQ|PABSW|PACKSSDW|PACKSSWB|PACKUSDW|" \
152 "PACKUSWB|PADDB|PADDD|PADDQ|PADDSB|PADDSW|PADDUSB|PADDUSW|PADDW|PALIGNR|" \
153 "PAND|PANDN|PAUSE|PAVGB|PAVGW|PBLENDVB|PBLENDW|PCLMULQDQ|PCMPEQB|PCMPEQD|" \
154 "PCMPEQQ|PCMPEQW|PCMPESTRI|PCMPESTRM|PCMPGTB|PCMPGTD|PCMPGTQ|PCMPGTW|" \
155 "PCMPISTRI|PCMPISTRM|PDEP|PEXT|PEXTRB|PEXTRD|PEXTRQ|PEXTRW|PHADDD|PHADDSW|" \
156 "PHADDW|PHMINPOSUW|PHSUBD|PHSUBSW|PHSUBW|PINSRB|PINSRD|PINSRQ|PINSRW|" \
157 "PMADDUBSW|PMADDWD|PMAXSB|PMAXSD|PMAXSQ|PMAXSW|PMAXUB|PMAXUD|PMAXUQ|PMAXUW|" \
158 "PMINSB|PMINSD|PMINSQ|PMINSW|PMINUB|PMINUD|PMINUQ|PMINUW|PMOVMSKB|PMOVSX|" \
159 "PMOVZX|PMULDQ|PMULHRSW|PMULHUW|PMULHW|PMULLD|PMULLQ|PMULLW|PMULUDQ|POP|" \
160 "POPA|POPAD|POPCNT|POPF|POPFD|POPFQ|POR|PREFETCHW|PREFETCHh|PSADBW|PSHUFB|" \
161 "PSHUFD|PSHUFHW|PSHUFLW|PSHUFW|PSIGNB|PSIGND|PSIGNW|PSLLD|PSLLDQ|PSLLQ|" \
162 "PSLLW|PSRAD|PSRAQ|PSRAW|PSRLD|PSRLDQ|PSRLQ|PSRLW|PSUBB|PSUBD|PSUBQ|PSUBSB|" \
163 "PSUBSW|PSUBUSB|PSUBUSW|PSUBW|PTEST|PTWRITE|PUNPCKHBW|PUNPCKHDQ|PUNPCKHQDQ|" \
164 "PUNPCKHWD|PUNPCKLBW|PUNPCKLDQ|PUNPCKLQDQ|PUNPCKLWD|PUSH|PUSHA|PUSHAD|" \
165 "PUSHF|PUSHFD|PUSHFQ|PXOR|RCL|RCPPS|RCPSS|RCR|RDFSBASE|RDGSBASE|RDMSR|" \
166 "RDPID|RDPKRU|RDPMC|RDRAND|RDSEED|RDTSC|RDTSCP|REP|REPE|REPNE|REPNZ|REPZ|" \
167 "RET|ROL|ROR|RORX|ROUNDPD|ROUNDPS|ROUNDSD|ROUNDSS|RSM|RSQRTPS|RSQRTSS|SAHF|" \
168 "SAL|SAR|SARX|SBB|SCAS|SCASB|SCASD|SCASW|SETcc|SFENCE|SGDT|SHA1MSG1|" \
169 "SHA1MSG2|SHA1NEXTE|SHA1RNDS4|SHA256MSG1|SHA256MSG2|SHA256RNDS2|SHL|SHLD|" \
170 "SHLX|SHR|SHRD|SHRX|SHUFPD|SHUFPS|SIDT|SLDT|SMSW|SQRTPD|SQRTPS|SQRTSD|" \
171 "SQRTSS|STAC|STC|STD|STI|STMXCSR|STOS|STOSB|STOSD|STOSQ|STOSW|STR|SUB|" \
172 "SUBPD|SUBPS|SUBSD|SUBSS|SWAPGS|SYSCALL|SYSENTER|SYSEXIT|SYSRET|TEST|" \
173 "TPAUSE|TZCNT|UCOMISD|UCOMISS|UD|UMONITOR|UMWAIT|UNPCKHPD|UNPCKHPS|" \
174 "UNPCKLPD|UNPCKLPS|VALIGND|VALIGNQ|VBLENDMPD|VBLENDMPS|VBROADCAST|" \
175 "VCOMPRESSPD|VCOMPRESSPS|VCVTPD2QQ|VCVTPD2UDQ|VCVTPD2UQQ|VCVTPH2PS|" \
176 "VCVTPS2PH|VCVTPS2QQ|VCVTPS2UDQ|VCVTPS2UQQ|VCVTQQ2PD|VCVTQQ2PS|VCVTSD2USI|" \
177 "VCVTSS2USI|VCVTTPD2QQ|VCVTTPD2UDQ|VCVTTPD2UQQ|VCVTTPS2QQ|VCVTTPS2UDQ|" \
178 "VCVTTPS2UQQ|VCVTTSD2USI|VCVTTSS2USI|VCVTUDQ2PD|VCVTUDQ2PS|VCVTUQQ2PD|" \
179 "VCVTUQQ2PS|VCVTUSI2SD|VCVTUSI2SS|VDBPSADBW|VERR|VERW|VEXPANDPD|VEXPANDPS|" \
180 "VEXTRACTF128|VEXTRACTF32x4|VEXTRACTF32x8|VEXTRACTF64x2|VEXTRACTF64x4|" \
181 "VEXTRACTI128|VEXTRACTI32x4|VEXTRACTI32x8|VEXTRACTI64x2|VEXTRACTI64x4|" \
182 "VFIXUPIMMPD|VFIXUPIMMPS|VFIXUPIMMSD|VFIXUPIMMSS|VFMADD132PD|VFMADD132PS|" \
183 "VFMADD132SD|VFMADD132SS|VFMADD213PD|VFMADD213PS|VFMADD213SD|VFMADD213SS|" \
184 "VFMADD231PD|VFMADD231PS|VFMADD231SD|VFMADD231SS|VFMADDSUB132PD|" \
185 "VFMADDSUB132PS|VFMADDSUB213PD|VFMADDSUB213PS|VFMADDSUB231PD|" \
186 "VFMADDSUB231PS|VFMSUB132PD|VFMSUB132PS|VFMSUB132SD|VFMSUB132SS|" \
187 "VFMSUB213PD|VFMSUB213PS|VFMSUB213SD|VFMSUB213SS|VFMSUB231PD|VFMSUB231PS|" \
188 "VFMSUB231SD|VFMSUB231SS|VFMSUBADD132PD|VFMSUBADD132PS|VFMSUBADD213PD|" \
189 "VFMSUBADD213PS|VFMSUBADD231PD|VFMSUBADD231PS|VFNMADD132PD|VFNMADD132PS|" \
190 "VFNMADD132SD|VFNMADD132SS|VFNMADD213PD|VFNMADD213PS|VFNMADD213SD|" \
191 "VFNMADD213SS|VFNMADD231PD|VFNMADD231PS|VFNMADD231SD|VFNMADD231SS|" \
192 "VFNMSUB132PD|VFNMSUB132PS|VFNMSUB132SD|VFNMSUB132SS|VFNMSUB213PD|" \
193 "VFNMSUB213PS|VFNMSUB213SD|VFNMSUB213SS|VFNMSUB231PD|VFNMSUB231PS|" \
194 "VFNMSUB231SD|VFNMSUB231SS|VFPCLASSPD|VFPCLASSPS|VFPCLASSSD|VFPCLASSSS|" \
195 "VGATHERDPD|VGATHERDPS|VGATHERQPD|VGATHERQPS|VGETEXPPD|VGETEXPPS|VGETEXPSD|" \
196 "VGETEXPSS|VGETMANTPD|VGETMANTPS|VGETMANTSD|VGETMANTSS|VINSERTF128|" \
197 "VINSERTF32x4|VINSERTF32x8|VINSERTF64x2|VINSERTF64x4|VINSERTI128|" \
198 "VINSERTI32x4|VINSERTI32x8|VINSERTI64x2|VINSERTI64x4|VMASKMOV|VMOVDQA32|" \
199 "VMOVDQA64|VMOVDQU16|VMOVDQU32|VMOVDQU64|VMOVDQU8|VPBLENDD|VPBLENDMB|" \
200 "VPBLENDMD|VPBLENDMQ|VPBLENDMW|VPBROADCAST|VPBROADCASTB|VPBROADCASTD|" \
201 "VPBROADCASTM|VPBROADCASTQ|VPBROADCASTW|VPCMPB|VPCMPD|VPCMPQ|VPCMPUB|" \
202 "VPCMPUD|VPCMPUQ|VPCMPUW|VPCMPW|VPCOMPRESSD|VPCOMPRESSQ|VPCONFLICTD|" \
203 "VPCONFLICTQ|VPERM2F128|VPERM2I128|VPERMB|VPERMD|VPERMI2B|VPERMI2D|" \
204 "VPERMI2PD|VPERMI2PS|VPERMI2Q|VPERMI2W|VPERMILPD|VPERMILPS|VPERMPD|VPERMPS|" \
205 "VPERMQ|VPERMT2B|VPERMT2D|VPERMT2PD|VPERMT2PS|VPERMT2Q|VPERMT2W|VPERMW|" \
206 "VPEXPANDD|VPEXPANDQ|VPGATHERDD|VPGATHERDQ|VPGATHERQD|VPGATHERQQ|VPLZCNTD|" \
207 "VPLZCNTQ|VPMADD52HUQ|VPMADD52LUQ|VPMASKMOV|VPMOVB2M|VPMOVD2M|VPMOVDB|" \
208 "VPMOVDW|VPMOVM2B|VPMOVM2D|VPMOVM2Q|VPMOVM2W|VPMOVQ2M|VPMOVQB|VPMOVQD|" \
209 "VPMOVQW|VPMOVSDB|VPMOVSDW|VPMOVSQB|VPMOVSQD|VPMOVSQW|VPMOVSWB|VPMOVUSDB|" \
210 "VPMOVUSDW|VPMOVUSQB|VPMOVUSQD|VPMOVUSQW|VPMOVUSWB|VPMOVW2M|VPMOVWB|" \
211 "VPMULTISHIFTQB|VPROLD|VPROLQ|VPROLVD|VPROLVQ|VPRORD|VPRORQ|VPRORVD|" \
212 "VPRORVQ|VPSCATTERDD|VPSCATTERDQ|VPSCATTERQD|VPSCATTERQQ|VPSLLVD|VPSLLVQ|" \
213 "VPSLLVW|VPSRAVD|VPSRAVQ|VPSRAVW|VPSRLVD|VPSRLVQ|VPSRLVW|VPTERNLOGD|" \
214 "VPTERNLOGQ|VPTESTMB|VPTESTMD|VPTESTMQ|VPTESTMW|VPTESTNMB|VPTESTNMD|" \
215 "VPTESTNMQ|VPTESTNMW|VRANGEPD|VRANGEPS|VRANGESD|VRANGESS|VRCP14PD|VRCP14PS|" \
216 "VRCP14SD|VRCP14SS|VREDUCEPD|VREDUCEPS|VREDUCESD|VREDUCESS|VRNDSCALEPD|" \
217 "VRNDSCALEPS|VRNDSCALESD|VRNDSCALESS|VRSQRT14PD|VRSQRT14PS|VRSQRT14SD|" \
218 "VRSQRT14SS|VSCALEFPD|VSCALEFPS|VSCALEFSD|VSCALEFSS|VSCATTERDPD|" \
219 "VSCATTERDPS|VSCATTERQPD|VSCATTERQPS|VSHUFF32x4|VSHUFF64x2|VSHUFI32x4|" \
220 "VSHUFI64x2|VTESTPD|VTESTPS|VZEROALL|VZEROUPPER|WAIT|WBINVD|WRFSBASE|" \
221 "WRGSBASE|WRMSR|WRPKRU|XABORT|XACQUIRE|XADD|XBEGIN|XCHG|XEND|XGETBV|XLAT|" \
222 "XLATB|XOR|XORPD|XORPS|XRELEASE|XRSTOR|XRSTORS|XSAVE|XSAVEC|XSAVEOPT|" \
223 "XSAVES|XSETBV|XTEST"
224
226{
227 //{ TOKEN_TYPE::WhiteSpace, R"((\s+))" },
228 { TOKEN_TYPE::WhiteSpace, R"(([ \t]+))" },
229 { TOKEN_TYPE::NewLine, R"((\n))" },
230 { TOKEN_TYPE::Comment, R"((;.*\n))" },
231 { TOKEN_TYPE::HexNumber, R"(([0-9][0-9a-f]*h))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\‍(\)])") },
232 { TOKEN_TYPE::DecNumber, R"(([0-9]+))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\‍(\)])") },
233 { TOKEN_TYPE::String, R"((\".*\"))" },
234
235 { TOKEN_TYPE::BraceOpen, R"((\‍())"},
236 { TOKEN_TYPE::BraceClose, R"((\)))"},
237 { TOKEN_TYPE::MemRefStart, R"((\[))"},
238 { TOKEN_TYPE::MemRefEnd, R"((\]))"},
239 { TOKEN_TYPE::Colon, R"((\:))"},
240 { TOKEN_TYPE::Operator, R"(([,\+\-\*\/\:]))"},
241 { TOKEN_TYPE::StringDef, R"((<.+>))" },
242
243 { TOKEN_TYPE::KW_include, R"((include))" FOLLOWED_BY(R"([\s])") },
244 { TOKEN_TYPE::KW_const, R"((\.const))" FOLLOWED_BY(R"([\s])") },
245 { TOKEN_TYPE::KW_code, R"((\.code))" FOLLOWED_BY(R"([\s])") },
246 { TOKEN_TYPE::KW_endprolog, R"((\.endprolog))" FOLLOWED_BY(R"([\s])") },
247 { TOKEN_TYPE::KW_ALIGN, R"((ALIGN))" FOLLOWED_BY(R"([\s])") },
248 { TOKEN_TYPE::KW_EXTERN, R"((EXTERN))" FOLLOWED_BY(R"([\s])") },
249 { TOKEN_TYPE::KW_EXTERN, R"((EXTRN))" FOLLOWED_BY(R"([\s])") },
250 { TOKEN_TYPE::KW_PUBLIC, R"((PUBLIC))" FOLLOWED_BY(R"([\s])") },
251 { TOKEN_TYPE::KW_ENDM, R"((ENDM))" FOLLOWED_BY(R"([\s\;])") },
252 { TOKEN_TYPE::KW_END, R"((END))" FOLLOWED_BY(R"([\s])") },
253 { TOKEN_TYPE::KW_if, R"((if))" FOLLOWED_BY(R"([\s])") },
254 { TOKEN_TYPE::KW_ifdef, R"((ifdef))" FOLLOWED_BY(R"([\s])")},
255 { TOKEN_TYPE::KW_ifndef, R"((ifndef))" FOLLOWED_BY(R"([\s])")},
256 { TOKEN_TYPE::KW_else, R"((else))" FOLLOWED_BY(R"([\s])")},
257 { TOKEN_TYPE::KW_endif, R"((endif))" FOLLOWED_BY(R"([\s])")},
258
259 { TOKEN_TYPE::KW_allocstack, R"((.allocstack))" FOLLOWED_BY(R"([\s])") },
260 { TOKEN_TYPE::KW_savereg, R"((.savereg))" FOLLOWED_BY(R"([\s])") },
261 { TOKEN_TYPE::KW_savexmm128, R"((.savexmm128))" FOLLOWED_BY(R"([\s])") },
262
263 { TOKEN_TYPE::KW_DB, R"((DB))" FOLLOWED_BY(R"([\s])") },
264 { TOKEN_TYPE::KW_DW, R"((DW))" FOLLOWED_BY(R"([\s])") },
265 { TOKEN_TYPE::KW_DD, R"((DD))" FOLLOWED_BY(R"([\s])") },
266 { TOKEN_TYPE::KW_DQ, R"((DQ))" FOLLOWED_BY(R"([\s])") },
267 { TOKEN_TYPE::KW_EQU, R"((EQU))" FOLLOWED_BY(R"([\s])") },
268 { TOKEN_TYPE::KW_TEXTEQU, R"((TEXTEQU))" FOLLOWED_BY(R"([\s])") },
269 { TOKEN_TYPE::KW_MACRO, R"((MACRO))" FOLLOWED_BY(R"([\s\;])") },
270 { TOKEN_TYPE::KW_PROC, R"((PROC))" FOLLOWED_BY(R"([\s\;])") },
271 { TOKEN_TYPE::KW_FRAME, R"((FRAME))" FOLLOWED_BY(R"([\s\;])") },
272 { TOKEN_TYPE::KW_ENDP, R"((ENDP))" FOLLOWED_BY(R"([\s\;])") },
273 { TOKEN_TYPE::KW_RECORD, R"((RECORD))" FOLLOWED_BY(R"([\s\;])") },
274 { TOKEN_TYPE::KW_MASK, R"((MASK))" FOLLOWED_BY(R"([\s\;])")},
275 { TOKEN_TYPE::KW_ERRDEF, R"((\.ERRDEF))" FOLLOWED_BY(R"([\s\;])")},
276
277 { TOKEN_TYPE::Filename, R"(([a-z_][a-z0-9_]*\.inc))" FOLLOWED_BY(R"([\s])") },
278 { TOKEN_TYPE::Instruction, "(" INSTRUCTION ")" FOLLOWED_BY(R"([\s])") },
279 { TOKEN_TYPE::Reg8, R"((al|ah|bl|bh|cl|ch|dl|dh|sil|dil|bpl|spl|r8b|r9b|r10b|r11b|r12b|r13b|r14b|r15b))" FOLLOWED_BY(R"([\s\,])") },
280 { TOKEN_TYPE::Reg16, R"((ax|bx|cx|dx|si|di|bp|sp|r8w|r9w|r10w|r11w|r12w|r13w|r14w|r15w))" FOLLOWED_BY(R"([\s\,])") },
281 { TOKEN_TYPE::Reg32, R"((eax|ebx|ecx|edx|esi|edi|ebp|esp|r8d|r9d|r10d|r11d|r12d|r13d|r14d|r15d))" FOLLOWED_BY(R"([\s\,])") },
282 { TOKEN_TYPE::Reg64, R"((rax|rbx|rcx|rdx|rsi|rdi|rbp|rsp|r8|r9|r10|r11|r12|r13|r14|r15))" FOLLOWED_BY(R"([\s\,])") },
283 { TOKEN_TYPE::RegXmm, R"((xmm0|xmm1|xmm2|xmm3|xmm4|xmm5|xmm6|xmm7|xmm8|xmm9|xmm10|xmm11|xmm12|xmm13|xmm14|xmm15))" FOLLOWED_BY(R"([\s\,])") },
284 { TOKEN_TYPE::BYTE_PTR, R"((BYTE[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
285 { TOKEN_TYPE::WORD_PTR, R"((WORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
286 { TOKEN_TYPE::DWORD_PTR, R"((DWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
287 { TOKEN_TYPE::QWORD_PTR, R"((QWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
288 { TOKEN_TYPE::XMMWORD_PTR, R"((XMMWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
289
290 { TOKEN_TYPE::Identifier, R"((@@))" FOLLOWED_BY(SEPARATOR)},
291 { TOKEN_TYPE::Identifier, R"((@[a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)},
292 { TOKEN_TYPE::Identifier, R"(([a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)},
293
294};
295
296// FIXME: use context?
297unsigned int g_label_number = 0;
298
300
301void
303{
304 g_identifiers.push_back(tok.str());
305 //fprintf(stderr, "Added mem id: '%s'\n", tok.str().c_str());
306}
307
308bool
310{
311 for (auto id : g_identifiers)
312 {
313 if (id == tok.str())
314 {
315 return true;
316 }
317 }
318
319 return false;
320}
321
322bool
323iequals(const string &a, const string &b)
324{
325 size_t sz = a.size();
326 if (b.size() != sz)
327 return false;
328 for (unsigned int i = 0; i < sz; ++i)
329 if (tolower(a[i]) != tolower(b[i]))
330 return false;
331 return true;
332}
333
334Token
336{
337 if (tok.type() != type)
338 {
339 throw "Not white space after identifier!\n";
340 }
341
342 return tok;
343}
344
346{
347 int type = tok.type();
348 if (type != TOKEN_TYPE::WhiteSpace)
349 {
350 throw "Not white space after identifier!\n";
351 }
352
353 return tok;
354}
355
357{
358 int type = tok.type();
359 if ((type != TOKEN_TYPE::WhiteSpace) &&
360 (type != TOKEN_TYPE::NewLine))
361 {
362 throw "Not white space after identifier!\n";
363 }
364
365 return tok;
366}
367
369{
370 for (string &s : list)
371 {
372 if (s == str)
373 {
374 return true;
375 }
376 }
377
378 return false;
379}
380
381size_t
382translate_token(TokenList& tokens, size_t index, const vector<string> &macro_params)
383{
384 Token tok = tokens[index];
385 switch (tok.type())
386 {
387 case TOKEN_TYPE::Comment:
388 printf("//%s", tok.str().c_str() + 1);
389 break;
390
391 case TOKEN_TYPE::DecNumber:
392 {
393 unsigned long long num = stoull(tok.str(), nullptr, 10);
394 printf("%llu", num);
395 break;
396 }
397
398 case TOKEN_TYPE::HexNumber:
399 {
400 string number = tok.str();
401 printf("0x%s", number.substr(0, number.size() - 1).c_str());
402 break;
403 }
404
405 case TOKEN_TYPE::Identifier:
406 if (is_string_in_list(macro_params, tok.str()))
407 {
408 printf("\\");
409 }
410 printf("%s", tok.str().c_str());
411 break;
412
413 // We migt want to improve these
414 case TOKEN_TYPE::BYTE_PTR:
415 case TOKEN_TYPE::WORD_PTR:
416 case TOKEN_TYPE::DWORD_PTR:
417 case TOKEN_TYPE::QWORD_PTR:
418 case TOKEN_TYPE::XMMWORD_PTR:
419
420 // Check these. valid only in instructions?
421 case TOKEN_TYPE::Reg8:
422 case TOKEN_TYPE::Reg16:
423 case TOKEN_TYPE::Reg32:
424 case TOKEN_TYPE::Reg64:
425 case TOKEN_TYPE::RegXmm:
426 case TOKEN_TYPE::Instruction:
427
428 case TOKEN_TYPE::WhiteSpace:
429 case TOKEN_TYPE::NewLine:
430 case TOKEN_TYPE::Operator:
431 printf("%s", tok.str().c_str());
432 break;
433
434 default:
435 printf("%s", tok.str().c_str());
436 break;
437 }
438
439 return index + 1;
440}
441
442size_t complete_line(TokenList &tokens, size_t index, const vector<string> &macro_params)
443{
444 while (index < tokens.size())
445 {
446 Token tok = tokens[index];
447 index = translate_token(tokens, index, macro_params);
448 if ((tok.type() == TOKEN_TYPE::NewLine) ||
449 (tok.type() == TOKEN_TYPE::Comment))
450 {
451 break;
452 }
453 }
454
455 return index;
456}
457
458size_t
459translate_expression(TokenList &tokens, size_t index, const vector<string> &macro_params)
460{
461 while (index < tokens.size())
462 {
463 Token tok = tokens[index];
464 switch (tok.type())
465 {
466 case TOKEN_TYPE::NewLine:
467 case TOKEN_TYPE::Comment:
468 return index;
469
470 case TOKEN_TYPE::KW_MASK:
471 printf("MASK_");
472 index += 2;
473 break;
474
475 case TOKEN_TYPE::Instruction:
476 if (iequals(tok.str(), "and"))
477 {
478 printf("&");
479 index += 1;
480 }
481 else if (iequals(tok.str(), "or"))
482 {
483 printf("|");
484 index += 1;
485 }
486 else if (iequals(tok.str(), "shl"))
487 {
488 printf("<<");
489 index += 1;
490 }
491 else if (iequals(tok.str(), "not"))
492 {
493 printf("!");
494 index += 1;
495 }
496 else
497 {
498 throw "Invalid expression";
499 }
500 break;
501
502 case TOKEN_TYPE::Operator:
503 if (tok.str() == ",")
504 {
505 return index;
506 }
507 case TOKEN_TYPE::WhiteSpace:
508 case TOKEN_TYPE::BraceOpen:
509 case TOKEN_TYPE::BraceClose:
510 case TOKEN_TYPE::DecNumber:
511 case TOKEN_TYPE::HexNumber:
512 case TOKEN_TYPE::Identifier:
513 index = translate_token(tokens, index, macro_params);
514 break;
515
516 default:
517 index = translate_token(tokens, index, macro_params);
518 }
519 }
520
521 return index;
522}
523
524size_t translate_mem_ref(TokenList& tokens, size_t index, const vector<string>& macro_params)
525{
526 unsigned int offset = 0;
527
528 Token tok = tokens[index];
529
530 if ((tok.type() == TOKEN_TYPE::DecNumber) ||
531 (tok.type() == TOKEN_TYPE::HexNumber))
532 {
533 offset = stoi(tok.str(), nullptr, 0);
534 index += 2;
535 }
536
537 index = translate_token(tokens, index, macro_params);
538
539 while (index < tokens.size())
540 {
541 Token tok = tokens[index];
542 index = translate_token(tokens, index, macro_params);
543 if (tok.type() == TOKEN_TYPE::MemRefEnd)
544 {
545 if (offset != 0)
546 {
547 printf(" + %u", offset);
548 }
549 return index;
550 }
551 }
552
553 throw "Failed to translate memory ref";
554 return index;
555}
556
557size_t translate_instruction_param(TokenList& tokens, size_t index, const vector<string>& macro_params)
558{
559 switch (tokens[index].type())
560 {
561 case TOKEN_TYPE::BYTE_PTR:
562 case TOKEN_TYPE::WORD_PTR:
563 case TOKEN_TYPE::DWORD_PTR:
564 case TOKEN_TYPE::QWORD_PTR:
565 case TOKEN_TYPE::XMMWORD_PTR:
566 index = translate_token(tokens, index, macro_params);
567
568 // Optional whitespace
569 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
570 {
571 index = translate_token(tokens, index, macro_params);
572 }
573 }
574
575 while (index < tokens.size())
576 {
577 Token tok = tokens[index];
578 switch (tok.type())
579 {
580 case TOKEN_TYPE::MemRefStart:
581 return translate_mem_ref(tokens, index, macro_params);
582
583 case TOKEN_TYPE::NewLine:
584 case TOKEN_TYPE::Comment:
585 return index;
586
587 case TOKEN_TYPE::Operator:
588 if (tok.str() == ",")
589 return index;
590
591 case TOKEN_TYPE::Identifier:
592 index = translate_token(tokens, index, macro_params);
593 if (is_mem_id(tok))
594 {
595 printf("[rip]");
596 }
597 break;
598
599 default:
600 index = translate_expression(tokens, index, macro_params);
601 }
602 }
603
604 return index;
605}
606
607size_t translate_instruction(TokenList& tokens, size_t index, const vector<string>& macro_params)
608{
609 // Translate the instruction itself
610 index = translate_token(tokens, index, macro_params);
611
612 // Handle instruction parameters
613 while (index < tokens.size())
614 {
615 // Optional whitespace
616 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
617 {
618 index = translate_token(tokens, index, macro_params);
619 }
620
621 // Check for parameters
622 Token tok = tokens[index];
623 switch (tok.type())
624 {
625 case TOKEN_TYPE::Comment:
626 case TOKEN_TYPE::NewLine:
627 return index;
628
629 case TOKEN_TYPE::WhiteSpace:
630 case TOKEN_TYPE::Operator:
631 index = translate_token(tokens, index, macro_params);
632 break;
633
634 default:
635 index = translate_instruction_param(tokens, index, macro_params);
636 break;
637 }
638 }
639
640 return index;
641}
642
643size_t translate_item(TokenList& tokens, size_t index, const vector<string> &macro_params)
644{
645 switch (tokens[index].type())
646 {
647 case TOKEN_TYPE::DecNumber:
648 case TOKEN_TYPE::HexNumber:
649 case TOKEN_TYPE::String:
650 case TOKEN_TYPE::WhiteSpace:
651 return translate_token(tokens, index, macro_params);
652 }
653
654 throw "Failed to translate item";
655 return -1;
656}
657
658size_t translate_list(TokenList& tokens, size_t index, const vector<string> &macro_params)
659{
660 while (index < tokens.size())
661 {
662 // The item itself
663 index = translate_item(tokens, index, macro_params);
664
665 // Optional white space
666 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
667 {
668 index = translate_token(tokens, index, macro_params);
669 }
670
671 // End of list?
672 if ((tokens[index].type() == TOKEN_TYPE::Comment) ||
673 (tokens[index].type() == TOKEN_TYPE::NewLine))
674 {
675 return index;
676 }
677
678 // We expect a comma here
679 if ((tokens[index].type() != TOKEN_TYPE::Operator) ||
680 (tokens[index].str() != ","))
681 {
682 throw "Unexpected end of list";
683 }
684
685 index = translate_token(tokens, index, macro_params);
686 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
687 {
688 index = translate_token(tokens, index, macro_params);
689 }
690 }
691
692 throw "Failed to translate list";
693 return -1;
694}
695
696size_t
697translate_data_def(TokenList& tokens, size_t index, const vector<string>& macro_params)
698{
699 Token tok = tokens[index];
700 Token tok1 = get_ws(tokens[index + 1]);
701 string directive, need, have ="";
702
703 switch (tok.type())
704 {
705 case TOKEN_TYPE::KW_DB:
706 directive = ".byte";
707 break;
708
709 case TOKEN_TYPE::KW_DW:
710 directive = ".short";
711 break;
712
713 case TOKEN_TYPE::KW_DD:
714 directive = ".long";
715 break;
716
717 case TOKEN_TYPE::KW_DQ:
718 directive = ".quad";
719 break;
720 }
721
722 index += 2;
723
724 while (index < tokens.size())
725 {
726 // Check if we need '.ascii' for ASCII strings
727 if (tokens[index].str()[0] == '\"')
728 {
729 need = ".ascii";
730 }
731 else
732 {
733 need = directive;
734 }
735
736 // Output the directive we need (or a comma)
737 if (have == "")
738 {
739 printf("%s ", need.c_str());
740 }
741 else if (have != need)
742 {
743 printf("\n%s ", need.c_str());
744 }
745 else
746 {
747 printf(", ");
748 }
749
750 have = need;
751
752 // The item itself
753 index = translate_item(tokens, index, macro_params);
754
755 // Optional white space
756 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
757 {
758 index = translate_token(tokens, index, macro_params);
759 }
760
761 // End of list?
762 if ((tokens[index].type() == TOKEN_TYPE::Comment) ||
763 (tokens[index].type() == TOKEN_TYPE::NewLine))
764 {
765 return index;
766 }
767
768 // We expect a comma here
769 if ((tokens[index].type() != TOKEN_TYPE::Operator) ||
770 (tokens[index].str() != ","))
771 {
772 throw "Unexpected end of list";
773 }
774
775 // Skip comma and optional white-space
776 index++;
777 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
778 {
779 index++;
780 }
781 }
782
783 throw "Failed to translate list";
784 return -1;
785}
786
787size_t
788translate_construct_one_param(string translated, TokenList& tokens, size_t index, const vector<string>& macro_params)
789{
790 // The next token should be white space
791 Token tok1 = get_ws(tokens[index + 1]);
792
793 printf("%s%s", translated.c_str(), tok1.str().c_str());
794 return translate_expression(tokens, index + 2, macro_params);
795}
796
797size_t
798translate_record(TokenList &tokens, size_t index, const vector<string> &macro_params)
799{
800 unsigned int bits, bitpos = 0;
801 unsigned long long oldmask = 0, mask = 0;
802
803 Token tok_name = get_expected_token(tokens[index], TOKEN_TYPE::Identifier);
804 index += 4;
805 while (index < tokens.size())
806 {
807 Token tok_member = get_expected_token(tokens[index++], TOKEN_TYPE::Identifier);
808
809 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
810 {
811 index++;
812 }
813
814 if (tokens[index++].str() != ":")
815 {
816 throw "Unexpected token";
817 }
818
819 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
820 {
821 index++;
822 }
823
824 Token tok_bits = tokens[index++];
825 if ((tok_bits.type() != TOKEN_TYPE::DecNumber) &&
826 (tok_bits.type() != TOKEN_TYPE::HexNumber))
827 {
828 throw "Unexpected token";
829 }
830
831 bits = stoi(tok_bits.str(), nullptr, 0);
832
833 printf("%s = %u\n", tok_member.str().c_str(), bitpos);
834
835 oldmask = (1ULL << bitpos) - 1;
836 bitpos += bits;
837 mask = (1ULL << bitpos) - 1 - oldmask;
838 printf("MASK_%s = 0x%llx\n", tok_member.str().c_str(), mask);
839
840 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
841 {
842 index++;
843 }
844
845 if ((tokens[index].type() == TOKEN_TYPE::NewLine) ||
846 (tokens[index].type() == TOKEN_TYPE::Comment))
847 {
848 break;
849 }
850
851 if (tokens[index].str() != ",")
852 {
853 throw "unexpected token";
854 }
855
856 index++;
857 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
858 {
859 index++;
860 }
861
862 if ((tokens[index].type() == TOKEN_TYPE::NewLine) ||
863 (tokens[index].type() == TOKEN_TYPE::Comment))
864 {
865 index++;
866 }
867
868 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
869 {
870 index++;
871 }
872 }
873
874 return index;
875}
876
877size_t
879{
880 Token tok = tokens[index];
881 Token tok1 = tokens[index + 1];
882
883 if (tok1.type() == TOKEN_TYPE::Colon)
884 {
885 if (tok.str() == "@@")
886 {
888 printf("%u:", g_label_number);
889 }
890 else
891 {
892 printf("%s:", tok.str().c_str());
893 }
894 return index + 2;
895 }
896
897 Token tok2 = tokens[index + 2];
898
899 switch (tok2.type())
900 {
901 case TOKEN_TYPE::KW_MACRO:
902 throw "Cannot have a nested macro!";
903
904 case TOKEN_TYPE::KW_DB:
905 case TOKEN_TYPE::KW_DW:
906 case TOKEN_TYPE::KW_DD:
907 case TOKEN_TYPE::KW_DQ:
908 printf("%s:%s", tok.str().c_str(), tok1.str().c_str());
909 add_mem_id(tok);
910 return translate_data_def(tokens, index + 2, macro_params);
911
912 case TOKEN_TYPE::KW_EQU:
913 //printf("%s%s", tok.str().c_str(), tok1.str().c_str());
914 printf("#define %s ", tok.str().c_str());
915 return translate_expression(tokens, index + 3, macro_params);
916
917 case TOKEN_TYPE::KW_TEXTEQU:
918 {
919 Token tok3 = get_ws(tokens[index + 3]);
920 Token tok4 = get_expected_token(tokens[index + 4], TOKEN_TYPE::StringDef);
921
922 string textdef = tok4.str();
923 printf("#define %s %s", tok.str().c_str(), textdef.substr(1, textdef.size() - 2).c_str());
924 return index + 5;
925 }
926
927 case TOKEN_TYPE::KW_PROC:
928 {
929 printf(".func %s\n", tok.str().c_str());
930 printf("%s:", tok.str().c_str());
931 index += 3;
932
933 if ((tokens[index].type() == TOKEN_TYPE::WhiteSpace) &&
934 (tokens[index + 1].type() == TOKEN_TYPE::KW_FRAME))
935 {
936#ifdef TARGET_amd64
937 printf("\n.seh_proc %s\n", tok.str().c_str());
938#else
939 printf("\n.cfi_startproc\n");
940#endif
941 index += 2;
942 }
943 break;
944 }
945
946 case TOKEN_TYPE::KW_ENDP:
947 {
948 printf(".seh_endproc\n.endfunc");
949 index += 3;
950 break;
951 }
952
953 case TOKEN_TYPE::KW_RECORD:
954 index = translate_record(tokens, index, macro_params);
955 break;
956
957 default:
958 // We don't know what it is, assume it's a macro and treat it like an instruction
959 index = translate_instruction(tokens, index, macro_params);
960 break;
961 }
962
963 return index;
964}
965
966size_t
967translate_construct(TokenList& tokens, size_t index, const vector<string> &macro_params)
968{
969 Token tok = tokens[index];
970
971 switch (tok.type())
972 {
973 case TOKEN_TYPE::WhiteSpace:
974 case TOKEN_TYPE::NewLine:
975 case TOKEN_TYPE::Comment:
976 return translate_token(tokens, index, macro_params);
977
978 case TOKEN_TYPE::Identifier:
979 return translate_identifier_construct(tokens, index, macro_params);
980
981 case TOKEN_TYPE::KW_ALIGN:
982 index = translate_construct_one_param(".align", tokens, index, macro_params);
983 break;
984
985 case TOKEN_TYPE::KW_allocstack:
986 index = translate_construct_one_param(".seh_stackalloc", tokens, index, macro_params);
987 break;
988
989 case TOKEN_TYPE::KW_code:
990#ifdef TARGET_amd64
991 printf(".code64");
992#else
993 printf(".code");
994#endif
995 printf(" .intel_syntax noprefix");
996 index++;
997 break;
998
999 case TOKEN_TYPE::KW_const:
1000 printf(".section .rdata");
1001 index++;
1002 break;
1003
1004 case TOKEN_TYPE::KW_DB:
1005 case TOKEN_TYPE::KW_DW:
1006 case TOKEN_TYPE::KW_DD:
1007 case TOKEN_TYPE::KW_DQ:
1008 return translate_data_def(tokens, index, macro_params);
1009
1010 case TOKEN_TYPE::KW_END:
1011 printf("// END\n");
1012 return tokens.size();
1013
1014 case TOKEN_TYPE::KW_endprolog:
1015 printf(".seh_endprologue");
1016 index++;
1017 break;
1018
1019 case TOKEN_TYPE::KW_EXTERN:
1020 {
1021 Token tok1 = get_ws_or_nl(tokens[index + 1]);
1022 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Identifier);
1023 add_mem_id(tok2);
1024 printf("//");
1025 return complete_line(tokens, index, macro_params);
1026 }
1027
1028 case TOKEN_TYPE::KW_if:
1029 case TOKEN_TYPE::KW_ifdef:
1030 case TOKEN_TYPE::KW_ifndef:
1031 case TOKEN_TYPE::KW_else:
1032 case TOKEN_TYPE::KW_endif:
1033 // TODO: handle parameter differences between "if" and ".if" etc.
1034 printf(".");
1035 return complete_line(tokens, index, macro_params);
1036
1037 case TOKEN_TYPE::KW_include:
1038 {
1039 // The next token should be white space
1040 Token tok1 = get_ws_or_nl(tokens[index + 1]);
1041 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Filename);
1042 printf("#include \"%s.h\"", tok2.str().c_str());
1043 index += 3;
1044 break;
1045 }
1046
1047 case TOKEN_TYPE::KW_PUBLIC:
1048 index = translate_construct_one_param(".global", tokens, index, macro_params);
1049 break;
1050
1051 case TOKEN_TYPE::KW_savereg:
1052 printf(".seh_savereg");
1053 return complete_line(tokens, index + 1, macro_params);
1054
1055 case TOKEN_TYPE::KW_savexmm128:
1056 printf(".seh_savexmm");
1057 return complete_line(tokens, index + 1, macro_params);
1058
1059 case TOKEN_TYPE::Instruction:
1060 index = translate_instruction(tokens, index, macro_params);
1061 break;
1062
1063 case TOKEN_TYPE::KW_ERRDEF:
1064 printf("//");
1065 return complete_line(tokens, index, macro_params);
1066
1067 default:
1068 throw "failed to translate construct";
1069 }
1070
1071 // Skip optional white-space
1072 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
1073 {
1074 index++;
1075 }
1076
1077 // Line should end here!
1078 Token end = tokens[index];
1079 if ((end.type() != TOKEN_TYPE::Comment) &&
1080 (end.type() != TOKEN_TYPE::NewLine))
1081 {
1082 throw "unexpected tokens";
1083 }
1084
1085 return index;
1086}
1087
1088size_t
1090{
1091 vector<string> macro_params;
1092
1093 printf(".macro %s", tokens[index].str().c_str());
1094
1095 // Parse marameters
1096 index += 3;
1097 while (index < tokens.size())
1098 {
1099 Token tok = tokens[index];
1100 switch (tok.type())
1101 {
1102 case TOKEN_TYPE::NewLine:
1103 case TOKEN_TYPE::Comment:
1104 index = translate_token(tokens, index, macro_params);
1105 break;
1106
1107 case TOKEN_TYPE::Identifier:
1108 macro_params.push_back(tok.str());
1109 printf("%s", tok.str().c_str());
1110 index++;
1111 continue;
1112
1113 case TOKEN_TYPE::WhiteSpace:
1114 case TOKEN_TYPE::Operator:
1115 index = translate_token(tokens, index, macro_params);
1116 continue;
1117 }
1118
1119 break;
1120 }
1121
1122 // Parse content
1123 while (index < tokens.size())
1124 {
1125 Token tok = tokens[index];
1126 switch (tok.type())
1127 {
1128 case TOKEN_TYPE::KW_ENDM:
1129 printf(".endm");
1130 return index + 1;
1131
1132 default:
1133 index = translate_construct(tokens, index, macro_params);
1134 }
1135 }
1136
1137 throw "Failed to translate macro";
1138 return -1;
1139}
1140
1141void
1143{
1144 size_t index = 0;
1145 size_t size = tokens.size();
1146 vector<string> empty_macro_params;
1147
1148 while (index < size)
1149 {
1150 // Macros are special
1151 if ((tokens[index].type() == TOKEN_TYPE::Identifier) &&
1152 (tokens[index + 1].type() == TOKEN_TYPE::WhiteSpace) &&
1153 (tokens[index + 2].type() == TOKEN_TYPE::KW_MACRO))
1154 {
1155 index = translate_macro(tokens, index);
1156 }
1157 else
1158 {
1159 index = translate_construct(tokens, index, empty_macro_params);
1160 }
1161 }
1162}
1163
1164int main(int argc, char* argv[])
1165{
1166 if (argc < 2)
1167 {
1168 fprintf(stderr, "Invalid parameter!\n");
1169 return -1;
1170 }
1171
1172#if PROFILING_ENABLED
1173 time_t start_time = time(NULL);
1174#endif
1175
1176 try
1177 {
1178 // Open and read the input file
1179 string filename(argv[1]);
1182 buffer << file.rdbuf();
1183 string text = buffer.str();
1184
1185 // Create the tokenizer
1186 Tokenizer tokenizer(g_TokenList);
1187
1188 // Get a token list
1189 TokenList toklist(tokenizer, text);
1190
1191 // Now translate the tokens
1192 translate(toklist);
1193 }
1194 catch (const char* message)
1195 {
1196 fprintf(stderr, "Exception caught: '%s'\n", message);
1197 return -2;
1198 }
1199
1200#if PROFILING_ENABLED
1201 time_t total_time = time(NULL) + 1 - start_time;
1202 fprintf(stderr, "total_time = %llu\n", total_time);
1203 fprintf(stderr, "search_time = %llu\n", search_time);
1204 fprintf(stderr, "search: %llu %%\n", search_time * 100 / total_time);
1205#endif
1206
1207 return 0;
1208}
static int argc
Definition: ServiceArgs.c:12
int tolower(int c)
Definition: utclib.c:902
void add_mem_id(Token &tok)
Definition: asmpp.cpp:302
size_t translate_list(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:658
size_t translate_data_def(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:697
size_t translate_construct(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:967
size_t translate_item(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:643
vector< TOKEN_DEF > g_TokenList
Definition: asmpp.cpp:225
void translate(TokenList &tokens)
Definition: asmpp.cpp:1142
size_t translate_token(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:382
Token get_ws_or_nl(Token &&tok)
Definition: asmpp.cpp:356
#define INSTRUCTION
Definition: asmpp.cpp:115
bool is_mem_id(Token &tok)
Definition: asmpp.cpp:309
int fake_printf(const char *format,...)
Definition: asmpp.cpp:98
size_t complete_line(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:442
unsigned int g_label_number
Definition: asmpp.cpp:297
bool is_string_in_list(vector< string > list, string str)
Definition: asmpp.cpp:368
vector< string > g_identifiers
Definition: asmpp.cpp:299
time_t search_time
Definition: asmpp.cpp:26
size_t translate_expression(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:459
#define FOLLOWED_BY(x)
Definition: asmpp.cpp:107
Token get_expected_token(Token &&tok, TOKEN_TYPE type)
Definition: asmpp.cpp:335
bool iequals(const string &a, const string &b)
Definition: asmpp.cpp:323
TOKEN_TYPE
Definition: asmpp.cpp:29
@ KW_allocstack
Definition: asmpp.cpp:62
@ Reg32
Definition: asmpp.cpp:85
@ KW_FRAME
Definition: asmpp.cpp:74
@ KW_savereg
Definition: asmpp.cpp:63
@ KW_EXTERN
Definition: asmpp.cpp:52
@ KW_PROC
Definition: asmpp.cpp:73
@ StringDef
Definition: asmpp.cpp:45
@ MemRefEnd
Definition: asmpp.cpp:42
@ KW_ENDM
Definition: asmpp.cpp:54
@ BraceClose
Definition: asmpp.cpp:40
@ Comment
Definition: asmpp.cpp:34
@ KW_else
Definition: asmpp.cpp:59
@ Instruction
Definition: asmpp.cpp:82
@ KW_END
Definition: asmpp.cpp:55
@ KW_code
Definition: asmpp.cpp:49
@ KW_endprolog
Definition: asmpp.cpp:50
@ DecNumber
Definition: asmpp.cpp:35
@ Identifier
Definition: asmpp.cpp:95
@ KW_DW
Definition: asmpp.cpp:67
@ BraceOpen
Definition: asmpp.cpp:39
@ KW_RECORD
Definition: asmpp.cpp:76
@ Colon
Definition: asmpp.cpp:43
@ KW_ENDP
Definition: asmpp.cpp:75
@ Filename
Definition: asmpp.cpp:81
@ KW_EQU
Definition: asmpp.cpp:70
@ BYTE_PTR
Definition: asmpp.cpp:88
@ RegXmm
Definition: asmpp.cpp:87
@ WhiteSpace
Definition: asmpp.cpp:32
@ KW_DB
Definition: asmpp.cpp:66
@ MemRefStart
Definition: asmpp.cpp:41
@ QWORD_PTR
Definition: asmpp.cpp:91
@ KW_include
Definition: asmpp.cpp:47
@ KW_MACRO
Definition: asmpp.cpp:72
@ HexNumber
Definition: asmpp.cpp:36
@ KW_DD
Definition: asmpp.cpp:68
@ LabelName
Definition: asmpp.cpp:94
@ KW_const
Definition: asmpp.cpp:48
@ KW_TEXTEQU
Definition: asmpp.cpp:71
@ Reg8
Definition: asmpp.cpp:83
@ NewLine
Definition: asmpp.cpp:33
@ KW_ifndef
Definition: asmpp.cpp:58
@ KW_ifdef
Definition: asmpp.cpp:57
@ XMMWORD_PTR
Definition: asmpp.cpp:92
@ KW_PUBLIC
Definition: asmpp.cpp:53
@ Operator
Definition: asmpp.cpp:44
@ KW_DQ
Definition: asmpp.cpp:69
@ Eof
Definition: asmpp.cpp:31
@ KW_ALIGN
Definition: asmpp.cpp:51
@ KW_savexmm128
Definition: asmpp.cpp:64
@ DWORD_PTR
Definition: asmpp.cpp:90
@ String
Definition: asmpp.cpp:37
@ KW_endif
Definition: asmpp.cpp:60
@ KW_MASK
Definition: asmpp.cpp:78
@ KW_if
Definition: asmpp.cpp:56
@ WORD_PTR
Definition: asmpp.cpp:89
@ Invalid
Definition: asmpp.cpp:30
@ Reg16
Definition: asmpp.cpp:84
@ Reg64
Definition: asmpp.cpp:86
@ KW_ERRDEF
Definition: asmpp.cpp:79
Token get_ws(Token &&tok)
Definition: asmpp.cpp:345
size_t translate_identifier_construct(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:878
size_t translate_construct_one_param(string translated, TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:788
size_t translate_record(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:798
size_t translate_macro(TokenList &tokens, size_t index)
Definition: asmpp.cpp:1089
size_t translate_instruction(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:607
size_t translate_mem_ref(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:524
size_t translate_instruction_param(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:557
#define SEPARATOR
Definition: asmpp.cpp:113
#define index(s, c)
Definition: various.h:29
size_t size() const
Definition: tokenizer.hpp:239
int type() const
Definition: tokenizer.hpp:55
std::string str() const
Definition: tokenizer.hpp:50
_Self substr(size_type __pos=0, size_type __n=npos) const
Definition: _string.h:1022
const _CharT * c_str() const
Definition: _string.h:949
size_type size() const
Definition: _string.h:400
Definition: list.h:37
#define NULL
Definition: types.h:112
const WCHAR * text
Definition: package.c:1799
int main()
Definition: test.c:6
__kernel_time_t time_t
Definition: linux.h:252
#define printf
Definition: freeldr.h:94
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
GLdouble s
Definition: gl.h:2039
GLuint GLuint end
Definition: gl.h:1545
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: gl.h:1546
GLsizeiptr size
Definition: glext.h:5919
GLuint buffer
Definition: glext.h:5915
GLuint index
Definition: glext.h:6031
GLenum GLint GLuint mask
Definition: glext.h:6028
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * bits
Definition: glext.h:10929
GLuint GLuint num
Definition: glext.h:9618
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
GLintptr offset
Definition: glext.h:5920
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define stderr
Definition: stdio.h:100
_Check_return_opt_ _CRTIMP int __cdecl fprintf(_Inout_ FILE *_File, _In_z_ _Printf_format_string_ const char *_Format,...)
#define bits
Definition: infblock.c:15
const char * filename
Definition: ioapi.h:137
__u16 time
Definition: mkdosfs.c:8
static unsigned int number
Definition: dsound.c:1479
static UINT PSTR DWORD UINT * need
Definition: parser.c:36
#define argv
Definition: mplay32.c:18
Definition: features.h:417
const WCHAR * str
Definition: fci.c:127
Definition: tftpd.h:60
void push_back(const _Tp &__x=_STLP_DEFAULT_CONSTRUCTED(_Tp))
Definition: _vector.h:379