refactors to python version, initial attempts at rust rewrite

This commit is contained in:
Mira Kristipati 2024-08-19 16:12:35 -04:00
parent 4e2019cee3
commit dadccc1cf8
4 changed files with 262 additions and 38 deletions

140
ise/ise_logparser/Cargo.lock generated Normal file
View file

@ -0,0 +1,140 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "const_format"
version = "0.2.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3a214c7af3d04997541b18d432afaff4c455e79e2029079647e72fc2bd27673"
dependencies = [
"const_format_proc_macros",
]
[[package]]
name = "const_format_proc_macros"
version = "0.2.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7f6ff08fd20f4f299298a28e2dfa8a8ba1036e6cd2460ac1de7b425d76f2500"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "ise_logparser"
version = "0.1.0"
dependencies = [
"const_format",
"lazy_static",
"nom",
"nom-regex",
"regex",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "nom-regex"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72e5c7731c4c1370b61604ed52a2475e861aac9e08dec9f23903d4ddfdc91c18"
dependencies = [
"nom",
"regex",
]
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-xid"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"

View file

@ -0,0 +1,11 @@
[package]
name = "ise_logparser"
version = "0.1.0"
edition = "2021"
[dependencies]
const_format = "0.2.32"
lazy_static = "1.5.0"
nom = "7.1.3"
nom-regex = "0.2.0"
regex = "1.10.6"

View file

@ -15,6 +15,7 @@ import pstats
import parsy as ps import parsy as ps
from colorama import Back, Fore, Style from colorama import Back, Fore, Style
from parsy import Parser, generate, peek, regex, seq, string from parsy import Parser, generate, peek, regex, seq, string
from functools import reduce
ic.configureOutput(includeContext=True) ic.configureOutput(includeContext=True)
@ -167,13 +168,11 @@ def block_parser(text: str) -> Parser:
return regex(rf"^.*\n^.*{text}.*$\n.*", re.MULTILINE) return regex(rf"^.*\n^.*{text}.*$\n.*", re.MULTILINE)
timestamp: Parser = ( timestamp: Parser = regex(
regex( r"^\d{4}-\d{2}-\d{2}.\d{2}:\d{2}:\d{2}(\+\d{2}:\d{2})?(,\d{3})?", re.MULTILINE
r"^\d{4}-\d{2}-\d{2}.\d{2}:\d{2}:\d{2}(\+\d{2}:\d{2})?(,\d{3})?", re.MULTILINE ).skip(string(":").optional())
)
<< string(":").optional() debug_level: Parser = regex(r"[A-Za-z0-9_%-]+").skip(string(":").optional())
)
debug_level: Parser = regex(r"[A-Za-z0-9_%-]+") << string(":").optional()
start_block = block_parser("EASYPY JOB START") start_block = block_parser("EASYPY JOB START")
@ -183,33 +182,32 @@ summary_block = block_parser("Task Result Summary")
details_block = block_parser("Task Result Details") details_block = block_parser("Task Result Details")
message = ps.any_char.until(ps.peek(timestamp) | ps.eof).concat() message = ps.any_char.until(ps.alt(ps.peek(timestamp), ps.eof)).concat()
@generate @generate
def test_output_line(): def test_output_line():
ts = yield timestamp << SPACE ts = yield timestamp.skip(SPACE)
level = yield debug_level << SPACE level = yield debug_level.skip(SPACE)
src = yield (string("[") >> regex(r"[A-Za-z_0-9]+") << string("] ")).optional() src = yield (
string("[").then(regex(r"[A-Za-z_0-9]+")).skip(string("] ")).optional()
)
msg = yield message.map(lambda x: x.rstrip()) msg = yield message.map(lambda x: x.rstrip())
print_test_line(ts, get_debug_level_const(level), src, msg) print_test_line(ts, get_debug_level_const(level), src, msg)
return ps.success('') return ps.success("")
# return {"ts": ts, "level": level, "src": src, "msg": msg}
@generate @generate
def test_details(): def test_details():
indent = yield ( indent = yield (
NEWLINE.optional() NEWLINE.optional()
<< string("|").optional() .skip(string("|").optional())
>> (SPACE.at_least(1)) .skip(SPACE.at_least(1))
<< (ps.char_from("|`") + string("--")) .then(ps.seq(ps.char_from("|`"), string("--").skip(ps.whitespace)).concat())
<< ps.whitespace
) )
test_name = yield (ps.any_char.until(ps.whitespace).concat() << ps.whitespace) test_name = yield ps.any_char.until(ps.whitespace).skip(ps.whitespace).concat()
test_results = yield (ps.letter.many().concat() << NEWLINE) test_results = yield ps.letter.many().skip(NEWLINE).concat()
print(f" {'' * (len(indent)//2)}{colorize_status(test_results)}\t{test_name}") print(f" {'' * (len(indent)//2)}{colorize_status(test_results)}\t{test_name}")
return {"test_results": test_results, "test_name": test_name} return {"test_results": test_results, "test_name": test_name}
@ -217,11 +215,13 @@ def test_details():
@generate @generate
def subtask_details(): def subtask_details():
subtask_name = ( subtask_name = (
yield (string("|--") | string("`--")) + ps.whitespace yield seq(ps.alt(string("|--"), string("`--")), ps.whitespace)
>> ps.any_char.until(ps.whitespace).concat() .then(ps.any_char.until(ps.whitespace))
<< ps.whitespace .skip(ps.whitespace)
.concat()
) )
subtask_result = yield (ps.letter.many().concat() << NEWLINE)
subtask_result = yield ps.letter.many().skip(NEWLINE).concat()
print(f"{colorize_status(subtask_result)}\t{subtask_name}") print(f"{colorize_status(subtask_result)}\t{subtask_name}")
tests = yield test_details.many() tests = yield test_details.many()
return { return {
@ -234,41 +234,43 @@ def subtask_details():
@generate @generate
def task_details(): def task_details():
task_number = ( task_number = (
yield ps.any_char.until(string(":")).concat() << string(":") << ps.whitespace yield ps.any_char.until(string(":"))
.skip(string(":"))
.skip(ps.whitespace)
.concat()
) )
task_name = yield ps.any_char.until(ps.whitespace).concat() << ps.whitespace
task_name = yield ps.any_char.until(ps.whitespace).concat().skip(ps.whitespace)
print(f"{task_number}\t {Style.BRIGHT + task_name + Style.RESET_ALL}") print(f"{task_number}\t {Style.BRIGHT + task_name + Style.RESET_ALL}")
subtasks = yield (subtask_details.many()) subtasks = yield subtask_details.many()
return {"task_number": task_number, "task_name": task_name, "subtasks": subtasks} return {"task_number": task_number, "task_name": task_name, "subtasks": subtasks}
@generate @generate
# NOTE: Uncomment the print statements if you actually care what's in these sections # NOTE: Uncomment the print statements if you actually care what's in these sections
def sections(): def sections():
header = yield ( header = yield seq(
ps.any_char.until(peek(timestamp)).concat() # TODO: output this ps.any_char.until(peek(timestamp)), # TODO: output this
+ test_output_line.until(start_block).concat() test_output_line.until(start_block).skip(start_block).skip(NEWLINE),
<< start_block ).combine(list.__add__).concat()
<< NEWLINE
)
ic(elapsed()) ic(elapsed())
test_output = yield test_output_line.until(end_block) << end_block << NEWLINE test_output = yield test_output_line.until(end_block).skip(end_block).skip(NEWLINE)
ic(elapsed()) ic(elapsed())
# exit(1) # exit(1)
post_test = yield (ps.any_char.until(report_block).concat() << report_block) post_test = yield ps.any_char.until(report_block).skip(report_block).concat()
ic(elapsed()) ic(elapsed())
# print(post_test) # print(post_test)
report = yield (ps.any_char.until(summary_block).concat() << summary_block) report = yield ps.any_char.until(summary_block).skip(summary_block).concat()
ic(elapsed()) ic(elapsed())
# print(report) # print(report)
summary = yield (ps.any_char.until(details_block).concat() << details_block) summary = yield ps.any_char.until(details_block).skip(details_block).concat()
ic(elapsed()) ic(elapsed())
# print(summary) # print(summary)
details = yield task_details.many() details = yield task_details.many()
ic(elapsed()) ic(elapsed())
etc = yield (ps.any_char.until(ps.eof).concat()) etc = yield ps.any_char.until(ps.eof).concat()
ic(elapsed()) ic(elapsed())
return { return {
"header": header, "header": header,
@ -307,6 +309,8 @@ def main():
print_test_line = print_test_line.with_query(query) print_test_line = print_test_line.with_query(query)
sections.parse(log) sections.parse(log)
if __name__ == "__main__": if __name__ == "__main__":
# import io # import io
# pr = cProfile.Profile() # pr = cProfile.Profile()

View file

@ -0,0 +1,69 @@
use std::{cell::OnceCell, sync::OnceLock};
use lazy_static::lazy_static;
use nom::{
bytes::complete::take_while1, character::complete::anychar, error::ErrorKind, multi::many_till,
IResult,
};
use nom_regex::str::re_match;
use regex::Regex;
struct Sections {
header: String,
}
static TIMESTAMP_REGEX: OnceLock<Regex> = OnceLock::new();
static START_BLOCK_REGEX: OnceLock<Regex> = OnceLock::new();
static END_BLOCK_REGEX: OnceLock<Regex> = OnceLock::new();
fn timestamp<'a>(input: &'a str) -> IResult<&'a str, &'a str> {
re_match::<nom::error::Error<&str>>(TIMESTAMP_REGEX.get().unwrap().clone())(input)
}
fn debug_level(input: &str) -> IResult<&str, &str> {
take_while1(|c: char| c.is_alphabetic() || ['_', '-', '%'].contains(&c))(input)
}
fn start_block(input: &str) -> IResult<&str, &str> {
re_match::<nom::error::Error<&str>>(START_BLOCK_REGEX.get().unwrap().clone())(input)
}
fn end_block(input: &str) -> IResult<&str, &str> {
re_match::<nom::error::Error<&str>>(END_BLOCK_REGEX.get().unwrap().clone())(input)
}
fn test_output_line(input: &str) -> IResult<&str, &str> {
let (input, timestamp) = timestamp(input)?;
let (input, _) = nom::character::complete::char(' ')(input)?;
let (input, level) = debug_level(input)?;
let (input, _) = nom::character::complete::char(' ')(input)?;
let (input, _) = nom::character::complete::line_ending(input)?;
let (input, msg) = message(input)?;
IResult::Ok((input, ""))
}
fn header(input: &str) -> IResult<&str, String> {
// any_char.until(timestamp).concat()
let (_, (b, rest)) = many_till(anychar, timestamp)(input)?;
let joined = b.iter().collect::<String>();
// test_output_line.until(start_block)
let (_, (c, rest)) = many_till(test_output_line, start_block)(rest)?;
print!("{}", rest);
IResult::Ok((input, joined))
}
fn sections(log: &str) -> IResult<&str, Sections> {
let (_log, _header) = header(log)?;
todo!()
}
fn main() {
TIMESTAMP_REGEX
.set(Regex::new(r"^\d{4}-\d{2}-\d{2}.\d{2}:\d{2}:\d{2}(\+\d{2}:\d{2})?(,\d{3})?").unwrap())
.unwrap();
START_BLOCK_REGEX
.set(Regex::new(r"^.*\n^.*EASYPY JOB START.*$\n.*").unwrap())
.unwrap();
let file = include_str!("../consoleText");
header(file);
}