0% found this document useful (0 votes)
19 views6 pages

C++ Tokenizer and Flex File Guide

Compiler Construction
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
19 views6 pages

C++ Tokenizer and Flex File Guide

Compiler Construction
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

Lab 3 & 4

Report

Submitted By :
Mehak Aftab
2021-CS-92

28 September 2024

UNIVERSITY OF ENGINEERING AND TECHNOLOGY

COMPUTER SCIENCE DEPARTMENT


Contents
1 Flex File Defined with more Concrete Rules 4

2 My Tokenizer Code 5

2
List of Figures
1 Tokenizing Code . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6

3
1 Flex File Defined with more Concrete Rules
%{
#include <stdio.h>
%}

%%

"int"|"float"|"double"|"char"|"void"|"return" { printf("%s is a
data type or keyword\n", yytext); }
"if"|"else"|"while"|"for"|"do"|"switch"|"case" { printf("%s is a
control keyword\n", yytext); }
"main" { printf("%s is a
function name\n", yytext); }
"include" { printf("#%s is a
preprocessor directive\n", yytext); }
[0-9]+ { printf("%s is a
number\n", yytext); }
[a-zA-Z_][a-zA-Z0-9_]* { printf("%s is an
identifier\n", yytext); }
"=="|"!="|"<="|">="|"&&"|"||"|"="|"<"|">"|"!" { printf("%s is a
relational or logical operator\n", yytext); }
"+"|"-"|"*"|"/"|"%" { printf("%s is an
arithmetic operator\n", yytext); }
";" { printf("%s is a
semicolon\n", yytext); }
"," { printf("%s is a
comma\n", yytext); }
"{"|"}"|"("|")" { printf("%s is a
brace or parenthesis\n", yytext); }
\"[^\"]*\" { printf("%s is a
string literal\n", yytext); }
\/\/.* { printf("%s is a
\single-line comment\n", yytext); }
\/\*[^*]*\*+([^/*][^*]*\*+)*\/ { printf("%s is a
multi-line comment\n", yytext); }
[ \t\n]+ ; // Ignore
whitespace (spaces, tabs, newlines)
. { printf("Unknown
character: %s\n", yytext); }

%%

4
int main() {
printf("Enter your C++ code:\n");
yylex(); // Call the lexical analyzer to start scanning input
return 0;
}

int yywrap() {
return 1;
}

2 My Tokenizer Code
#include <iostream>
#include <vector>
#include <cctype>
#include <string>

using namespace std;

vector<string> tokenize(const string &code) {


vector<string> tokens; // Initialise vector to store tokens
string token; // Initialise string to store current token
for (size_t i = 0; i < code.length(); ++i) { // Loop through
each character in code
char current = code[i]; // Store current character

if (isspace(current)) continue; // Skip whitespace

if (isalnum(current) || current == '_') { // If current


character is alphanumeric or underscore
token += current; // Add current character to token
while (isalnum(code[i + 1]) || code[i + 1] == '_') { //
Loop until next character is not alphanumeric or underscore
token += code[++i]; // Add next character to token
}
}

else { // If current character is not alphanumeric or


underscore
token += current; // Add current character to token
}

5
tokens.push_back(token); // Add token to vector
token.clear(); // Clear token string
}
return tokens;
}

int main() {
string code = R"(
int main() {
int a = 5;
float b = 3.14;
if (a > b) {
a = a + 1;
}
return 0;
}
)";

vector<string> tokens = tokenize(code); // Call tokenize function

for (const string &token : tokens) { // Loop through each token in tokens
cout << token << endl; // Print token
}

return 0;
}

Figure 1: Tokenizing Code

You might also like