From b812dd932aaec17dc81dbd80d902291edfa5ca93 Mon Sep 17 00:00:00 2001 From: Michael Weiss Date: Sun, 2 Aug 2020 15:21:29 +0200 Subject: scripts/mailing-list-member-retirement: Add an initial PoC Can read fsi.mbox (228 MiB) but lacks features (e.g. limiting the parsing to messages of the past year) and it would be nice to indicate which messages couldn't be read. But it basically works :) --- scripts/mailing-list-member-retirement/.gitignore | 2 + scripts/mailing-list-member-retirement/go.mod | 8 +++ scripts/mailing-list-member-retirement/go.sum | 16 +++++ scripts/mailing-list-member-retirement/main.go | 80 +++++++++++++++++++++++ scripts/mailing-list-member-retirement/test.mbox | 17 +++++ 5 files changed, 123 insertions(+) create mode 100644 scripts/mailing-list-member-retirement/.gitignore create mode 100644 scripts/mailing-list-member-retirement/go.mod create mode 100644 scripts/mailing-list-member-retirement/go.sum create mode 100644 scripts/mailing-list-member-retirement/main.go create mode 100644 scripts/mailing-list-member-retirement/test.mbox (limited to 'scripts') diff --git a/scripts/mailing-list-member-retirement/.gitignore b/scripts/mailing-list-member-retirement/.gitignore new file mode 100644 index 0000000..7adb4be --- /dev/null +++ b/scripts/mailing-list-member-retirement/.gitignore @@ -0,0 +1,2 @@ +# Compiled binary: +mailing-list-active-members diff --git a/scripts/mailing-list-member-retirement/go.mod b/scripts/mailing-list-member-retirement/go.mod new file mode 100644 index 0000000..800eac0 --- /dev/null +++ b/scripts/mailing-list-member-retirement/go.mod @@ -0,0 +1,8 @@ +module github.com/fsi-tue/mailing-list-active-members + +go 1.14 + +require ( + github.com/emersion/go-mbox v1.0.0 + github.com/emersion/go-message v0.12.0 +) diff --git a/scripts/mailing-list-member-retirement/go.sum b/scripts/mailing-list-member-retirement/go.sum new file mode 100644 index 0000000..59050d5 --- /dev/null +++ b/scripts/mailing-list-member-retirement/go.sum @@ -0,0 +1,16 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emersion/go-mbox v1.0.0 h1:HN6aKbyqmgIfK9fS/gen+NRr2wXLSxZXWfdAIAnzQPc= +github.com/emersion/go-mbox v1.0.0/go.mod h1:Yp9IVuuOYLEuMv4yjgDHvhb5mHOcYH6x92Oas3QqEZI= +github.com/emersion/go-message v0.12.0 h1:mZnv35eZ6lB6EftTQBgYXspOH0FQdhpFhSUhA9i6/Zg= +github.com/emersion/go-message v0.12.0/go.mod h1:C4jnca5HOTo4bGN9YdqNQM9sITuT3Y0K6bSUw9RklvY= +github.com/emersion/go-textwrapper v0.0.0-20160606182133-d0e65e56babe h1:40SWqY0zE3qCi6ZrtTf5OUdNm5lDnGnjRSq9GgmeTrg= +github.com/emersion/go-textwrapper v0.0.0-20160606182133-d0e65e56babe/go.mod h1:aqO8z8wPrjkscevZJFVE1wXJrLpC5LtJG7fqLOsPb2U= +github.com/martinlindhe/base36 v1.0.0 h1:eYsumTah144C0A8P1T/AVSUk5ZoLnhfYFM3OGQxB52A= +github.com/martinlindhe/base36 v1.0.0/go.mod h1:+AtEs8xrBpCeYgSLoY/aJ6Wf37jtBuR0s35750M27+8= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/scripts/mailing-list-member-retirement/main.go b/scripts/mailing-list-member-retirement/main.go new file mode 100644 index 0000000..7ce7867 --- /dev/null +++ b/scripts/mailing-list-member-retirement/main.go @@ -0,0 +1,80 @@ +package main + +import ( + "bufio" + "flag" + "fmt" + "io" + "log" + "os" + "strings" + "github.com/emersion/go-mbox" + "github.com/emersion/go-message/mail" + _ "github.com/emersion/go-message/charset" +) + +// Prints the email addresses (one per line) of all senders by parsing an MBOX. + +func main() { + mboxNamePtr := flag.String("mbox", "test.mbox", "MBOX to read") + flag.Parse() + PrintAllMboxSenders(*mboxNamePtr) +} + +func PrintAllMboxSenders(fileName string) { + f, err := os.Open(fileName) + if err != nil { + log.Fatal(err) + } + defer func() { + if err = f.Close(); err != nil { + log.Fatal(err) + } + }() + fr := bufio.NewReader(f) + mr := mbox.NewReader(fr) + for { + r, err := mr.NextMessage() + if err == io.EOF { + break + } else if err != nil { + log.Fatal(err) + } + PrintMessageSender(r) + } +} + +func PrintMessageSender(r io.Reader) { + mr, err := mail.CreateReader(r) + if err != nil { + if err.Error() == "charset \"cp-850\": ianaindex: invalid encoding name" { + fmt.Fprintf(os.Stderr, "Ignored a mail due to an invalid charset\n") + return // TODO: Print message ID + } + log.Fatal(err) + } + + addr, err := mr.Header.AddressList("From") + if err != nil { + if strings.Contains(err.Error(), "invalid utf-8 in quoted-string") { + addr := strings.TrimPrefix(err.Error(), "mail: missing word in phrase: mail: invalid utf-8 in quoted-string: ") + fmt.Fprintf(os.Stderr, "Ignored due to invalid UTF-8 encoding: %v\n", addr) + return + } else if err.Error() == "mail: missing @ in addr-spec" { + fmt.Fprintf(os.Stderr, "Ignored a sender due to missing @\n") + return // TODO: Print invalid address + } else if err.Error() == "mail: no angle-addr" { + fmt.Fprintf(os.Stderr, "Ignored a mail due to no angle-address\n") + return // TODO: Print message ID + } + log.Fatal(err) + } else if len(addr) != 1 { + if len(addr) == 0 { + fmt.Fprintf(os.Stderr, "Ignored a mail due to a missing sender\n") + return // TODO: Print message ID + } + fmt.Fprintf(os.Stderr, "A mail has an unexpected number of senders: %v\n", len(addr)) + return // TODO: Print message ID + } + fmt.Println(addr[0].Address) +} diff --git a/scripts/mailing-list-member-retirement/test.mbox b/scripts/mailing-list-member-retirement/test.mbox new file mode 100644 index 0000000..d5fd851 --- /dev/null +++ b/scripts/mailing-list-member-retirement/test.mbox @@ -0,0 +1,17 @@ +From herp.derp@example.com Thu Jan 1 00:00:01 2015 +From: herp.derp@example.com (Herp Derp) +Date: Thu, 01 Jan 2015 00:00:01 +0100 +Subject: Test + +This is a simple test. + +CU. + +From derp.herp@example.com Thu Jan 1 00:00:01 2015 +From: derp.herp@example.com (Derp Herp) +Date: Thu, 02 Jan 2015 00:00:01 +0100 +Subject: Another test + +This is another simple test. + +Bye. -- cgit v1.2.3