refactors to python version, initial attempts at rust rewrite

This commit is contained in:
Mira Kristipati 2024-08-19 16:12:35 -04:00
parent 4e2019cee3
commit dadccc1cf8
4 changed files with 262 additions and 38 deletions

140
ise/ise_logparser/Cargo.lock generated Normal file
View file

@ -0,0 +1,140 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "const_format"
version = "0.2.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3a214c7af3d04997541b18d432afaff4c455e79e2029079647e72fc2bd27673"
dependencies = [
"const_format_proc_macros",
]
[[package]]
name = "const_format_proc_macros"
version = "0.2.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7f6ff08fd20f4f299298a28e2dfa8a8ba1036e6cd2460ac1de7b425d76f2500"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "ise_logparser"
version = "0.1.0"
dependencies = [
"const_format",
"lazy_static",
"nom",
"nom-regex",
"regex",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "nom-regex"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72e5c7731c4c1370b61604ed52a2475e861aac9e08dec9f23903d4ddfdc91c18"
dependencies = [
"nom",
"regex",
]
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-xid"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"

View file

@ -0,0 +1,11 @@
[package]
name = "ise_logparser"
version = "0.1.0"
edition = "2021"
[dependencies]
const_format = "0.2.32"
lazy_static = "1.5.0"
nom = "7.1.3"
nom-regex = "0.2.0"
regex = "1.10.6"

View file

@ -15,6 +15,7 @@ import pstats
import parsy as ps
from colorama import Back, Fore, Style
from parsy import Parser, generate, peek, regex, seq, string
from functools import reduce
ic.configureOutput(includeContext=True)
@ -167,13 +168,11 @@ def block_parser(text: str) -> Parser:
return regex(rf"^.*\n^.*{text}.*$\n.*", re.MULTILINE)
timestamp: Parser = (
regex(
r"^\d{4}-\d{2}-\d{2}.\d{2}:\d{2}:\d{2}(\+\d{2}:\d{2})?(,\d{3})?", re.MULTILINE
)
<< string(":").optional()
)
debug_level: Parser = regex(r"[A-Za-z0-9_%-]+") << string(":").optional()
timestamp: Parser = regex(
r"^\d{4}-\d{2}-\d{2}.\d{2}:\d{2}:\d{2}(\+\d{2}:\d{2})?(,\d{3})?", re.MULTILINE
).skip(string(":").optional())
debug_level: Parser = regex(r"[A-Za-z0-9_%-]+").skip(string(":").optional())
start_block = block_parser("EASYPY JOB START")
@ -183,33 +182,32 @@ summary_block = block_parser("Task Result Summary")
details_block = block_parser("Task Result Details")
message = ps.any_char.until(ps.peek(timestamp) | ps.eof).concat()
message = ps.any_char.until(ps.alt(ps.peek(timestamp), ps.eof)).concat()
@generate
def test_output_line():
ts = yield timestamp << SPACE
level = yield debug_level << SPACE
src = yield (string("[") >> regex(r"[A-Za-z_0-9]+") << string("] ")).optional()
ts = yield timestamp.skip(SPACE)
level = yield debug_level.skip(SPACE)
src = yield (
string("[").then(regex(r"[A-Za-z_0-9]+")).skip(string("] ")).optional()
)
msg = yield message.map(lambda x: x.rstrip())
print_test_line(ts, get_debug_level_const(level), src, msg)
return ps.success('')
# return {"ts": ts, "level": level, "src": src, "msg": msg}
return ps.success("")
@generate
def test_details():
indent = yield (
NEWLINE.optional()
<< string("|").optional()
>> (SPACE.at_least(1))
<< (ps.char_from("|`") + string("--"))
<< ps.whitespace
.skip(string("|").optional())
.skip(SPACE.at_least(1))
.then(ps.seq(ps.char_from("|`"), string("--").skip(ps.whitespace)).concat())
)
test_name = yield (ps.any_char.until(ps.whitespace).concat() << ps.whitespace)
test_results = yield (ps.letter.many().concat() << NEWLINE)
test_name = yield ps.any_char.until(ps.whitespace).skip(ps.whitespace).concat()
test_results = yield ps.letter.many().skip(NEWLINE).concat()
print(f" {'' * (len(indent)//2)}{colorize_status(test_results)}\t{test_name}")
return {"test_results": test_results, "test_name": test_name}
@ -217,11 +215,13 @@ def test_details():
@generate
def subtask_details():
subtask_name = (
yield (string("|--") | string("`--")) + ps.whitespace
>> ps.any_char.until(ps.whitespace).concat()
<< ps.whitespace
yield seq(ps.alt(string("|--"), string("`--")), ps.whitespace)
.then(ps.any_char.until(ps.whitespace))
.skip(ps.whitespace)
.concat()
)
subtask_result = yield (ps.letter.many().concat() << NEWLINE)
subtask_result = yield ps.letter.many().skip(NEWLINE).concat()
print(f"{colorize_status(subtask_result)}\t{subtask_name}")
tests = yield test_details.many()
return {
@ -234,41 +234,43 @@ def subtask_details():
@generate
def task_details():
task_number = (
yield ps.any_char.until(string(":")).concat() << string(":") << ps.whitespace
yield ps.any_char.until(string(":"))
.skip(string(":"))
.skip(ps.whitespace)
.concat()
)
task_name = yield ps.any_char.until(ps.whitespace).concat() << ps.whitespace
task_name = yield ps.any_char.until(ps.whitespace).concat().skip(ps.whitespace)
print(f"{task_number}\t {Style.BRIGHT + task_name + Style.RESET_ALL}")
subtasks = yield (subtask_details.many())
subtasks = yield subtask_details.many()
return {"task_number": task_number, "task_name": task_name, "subtasks": subtasks}
@generate
# NOTE: Uncomment the print statements if you actually care what's in these sections
def sections():
header = yield (
ps.any_char.until(peek(timestamp)).concat() # TODO: output this
+ test_output_line.until(start_block).concat()
<< start_block
<< NEWLINE
)
header = yield seq(
ps.any_char.until(peek(timestamp)), # TODO: output this
test_output_line.until(start_block).skip(start_block).skip(NEWLINE),
).combine(list.__add__).concat()
ic(elapsed())
test_output = yield test_output_line.until(end_block) << end_block << NEWLINE
test_output = yield test_output_line.until(end_block).skip(end_block).skip(NEWLINE)
ic(elapsed())
# exit(1)
post_test = yield (ps.any_char.until(report_block).concat() << report_block)
post_test = yield ps.any_char.until(report_block).skip(report_block).concat()
ic(elapsed())
# print(post_test)
report = yield (ps.any_char.until(summary_block).concat() << summary_block)
report = yield ps.any_char.until(summary_block).skip(summary_block).concat()
ic(elapsed())
# print(report)
summary = yield (ps.any_char.until(details_block).concat() << details_block)
summary = yield ps.any_char.until(details_block).skip(details_block).concat()
ic(elapsed())
# print(summary)
details = yield task_details.many()
ic(elapsed())
etc = yield (ps.any_char.until(ps.eof).concat())
etc = yield ps.any_char.until(ps.eof).concat()
ic(elapsed())
return {
"header": header,
@ -307,6 +309,8 @@ def main():
print_test_line = print_test_line.with_query(query)
sections.parse(log)
if __name__ == "__main__":
# import io
# pr = cProfile.Profile()

View file

@ -0,0 +1,69 @@
use std::{cell::OnceCell, sync::OnceLock};
use lazy_static::lazy_static;
use nom::{
bytes::complete::take_while1, character::complete::anychar, error::ErrorKind, multi::many_till,
IResult,
};
use nom_regex::str::re_match;
use regex::Regex;
struct Sections {
header: String,
}
static TIMESTAMP_REGEX: OnceLock<Regex> = OnceLock::new();
static START_BLOCK_REGEX: OnceLock<Regex> = OnceLock::new();
static END_BLOCK_REGEX: OnceLock<Regex> = OnceLock::new();
fn timestamp<'a>(input: &'a str) -> IResult<&'a str, &'a str> {
re_match::<nom::error::Error<&str>>(TIMESTAMP_REGEX.get().unwrap().clone())(input)
}
fn debug_level(input: &str) -> IResult<&str, &str> {
take_while1(|c: char| c.is_alphabetic() || ['_', '-', '%'].contains(&c))(input)
}
fn start_block(input: &str) -> IResult<&str, &str> {
re_match::<nom::error::Error<&str>>(START_BLOCK_REGEX.get().unwrap().clone())(input)
}
fn end_block(input: &str) -> IResult<&str, &str> {
re_match::<nom::error::Error<&str>>(END_BLOCK_REGEX.get().unwrap().clone())(input)
}
fn test_output_line(input: &str) -> IResult<&str, &str> {
let (input, timestamp) = timestamp(input)?;
let (input, _) = nom::character::complete::char(' ')(input)?;
let (input, level) = debug_level(input)?;
let (input, _) = nom::character::complete::char(' ')(input)?;
let (input, _) = nom::character::complete::line_ending(input)?;
let (input, msg) = message(input)?;
IResult::Ok((input, ""))
}
fn header(input: &str) -> IResult<&str, String> {
// any_char.until(timestamp).concat()
let (_, (b, rest)) = many_till(anychar, timestamp)(input)?;
let joined = b.iter().collect::<String>();
// test_output_line.until(start_block)
let (_, (c, rest)) = many_till(test_output_line, start_block)(rest)?;
print!("{}", rest);
IResult::Ok((input, joined))
}
fn sections(log: &str) -> IResult<&str, Sections> {
let (_log, _header) = header(log)?;
todo!()
}
fn main() {
TIMESTAMP_REGEX
.set(Regex::new(r"^\d{4}-\d{2}-\d{2}.\d{2}:\d{2}:\d{2}(\+\d{2}:\d{2})?(,\d{3})?").unwrap())
.unwrap();
START_BLOCK_REGEX
.set(Regex::new(r"^.*\n^.*EASYPY JOB START.*$\n.*").unwrap())
.unwrap();
let file = include_str!("../consoleText");
header(file);
}