Lab 3 & 4
Report
Submitted By :
Mehak Aftab
2021-CS-92
28 September 2024
UNIVERSITY OF ENGINEERING AND TECHNOLOGY
COMPUTER SCIENCE DEPARTMENT
Contents
1 Flex File Defined with more Concrete Rules 4
2 My Tokenizer Code 5
2
List of Figures
1 Tokenizing Code . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6
3
1 Flex File Defined with more Concrete Rules
%{
#include <stdio.h>
%}
%%
"int"|"float"|"double"|"char"|"void"|"return" { printf("%s is a
data type or keyword\n", yytext); }
"if"|"else"|"while"|"for"|"do"|"switch"|"case" { printf("%s is a
control keyword\n", yytext); }
"main" { printf("%s is a
function name\n", yytext); }
"include" { printf("#%s is a
preprocessor directive\n", yytext); }
[0-9]+ { printf("%s is a
number\n", yytext); }
[a-zA-Z_][a-zA-Z0-9_]* { printf("%s is an
identifier\n", yytext); }
"=="|"!="|"<="|">="|"&&"|"||"|"="|"<"|">"|"!" { printf("%s is a
relational or logical operator\n", yytext); }
"+"|"-"|"*"|"/"|"%" { printf("%s is an
arithmetic operator\n", yytext); }
";" { printf("%s is a
semicolon\n", yytext); }
"," { printf("%s is a
comma\n", yytext); }
"{"|"}"|"("|")" { printf("%s is a
brace or parenthesis\n", yytext); }
\"[^\"]*\" { printf("%s is a
string literal\n", yytext); }
\/\/.* { printf("%s is a
\single-line comment\n", yytext); }
\/\*[^*]*\*+([^/*][^*]*\*+)*\/ { printf("%s is a
multi-line comment\n", yytext); }
[ \t\n]+ ; // Ignore
whitespace (spaces, tabs, newlines)
. { printf("Unknown
character: %s\n", yytext); }
%%
4
int main() {
printf("Enter your C++ code:\n");
yylex(); // Call the lexical analyzer to start scanning input
return 0;
}
int yywrap() {
return 1;
}
2 My Tokenizer Code
#include <iostream>
#include <vector>
#include <cctype>
#include <string>
using namespace std;
vector<string> tokenize(const string &code) {
vector<string> tokens; // Initialise vector to store tokens
string token; // Initialise string to store current token
for (size_t i = 0; i < code.length(); ++i) { // Loop through
each character in code
char current = code[i]; // Store current character
if (isspace(current)) continue; // Skip whitespace
if (isalnum(current) || current == '_') { // If current
character is alphanumeric or underscore
token += current; // Add current character to token
while (isalnum(code[i + 1]) || code[i + 1] == '_') { //
Loop until next character is not alphanumeric or underscore
token += code[++i]; // Add next character to token
}
}
else { // If current character is not alphanumeric or
underscore
token += current; // Add current character to token
}
5
tokens.push_back(token); // Add token to vector
token.clear(); // Clear token string
}
return tokens;
}
int main() {
string code = R"(
int main() {
int a = 5;
float b = 3.14;
if (a > b) {
a = a + 1;
}
return 0;
}
)";
vector<string> tokens = tokenize(code); // Call tokenize function
for (const string &token : tokens) { // Loop through each token in tokens
cout << token << endl; // Print token
}
return 0;
}
Figure 1: Tokenizing Code