BeanIO

BeanIO is an open source Java framework for marshalling and unmarshalling Java beans from a flat file, stream, or simple String object.

Features:

Support for XML, CSV, delimited and fixed length stream formats
XML, Java annotations or builder API based field mapping
Configurable record ordering and grouping rules
Object binding that spans multiple records
Record identification by one or more field values, or by record length
Common field validation rules with customizable error messages
Extensible stream parsing and type handling
Integration with Spring Batch
OSGi compatible

Source control for the BeanIO project is hosted by Google Code

A Quick Example

Let's suppose you want to read and write a CSV flat file of contact information with the following record layout:

	Field Name	Format
Header Record
0	Record Type	"H"
1	File Date	Date (YYYY-MM-DD)
Detail Record
0	Record Type	"D"
1	First Name	String
2	Last Name	String
3	Street	String
4	City	String
5	State	String
6	Zip	String
Trailer Record
0	Record Type	"T"
1	Detail Record Count	Integer

A sample input file could look like this:

H,2012-05-19
D,Joe,Johnson,123 Main St,Chicago,IL,60610
D,Jane,Smith,,,,
D,Albert,Jackson,456 State St,Chicago,IL,60614
T,3

And let's suppose you want to bind detail records to the following Java class.

package example;

public class Contact {
    String firstName;
    String lastName;
    String street;
    String city;
    String state;
    String zip;
    
    // getters and setters not shown...
}

BeanIO is configured using an XML mapping file. A 2.0 mapping file named "contacts.xml" (shown below) can be used to read and write our CSV contacts file.

<beanio xmlns="http://www.beanio.org/2012/03">

  <!-- 'format' identifies the type of stream -->
  <stream name="contacts" format="csv">
    <!-- 'class' binds the header record to a java.util.HashMap -->
    <record name="header" class="map">
      <!-- 'rid' indicates this field is used to identify the record -->
      <field name="recordType" rid="true" literal="H" />
      <!-- 'format' can be used to provide Date and Number formats -->
      <field name="fileDate" type="date" format="yyyy-MM-dd" />
    </record>  
  
    <!-- Detail records are bound to example.Contact -->
    <record name="contact" class="example.Contact">
      <!-- 'ignore' indicates this field is not bound to a bean property -->
      <field name="recordType" rid="true" literal="D" ignore="true" />
      <field name="firstName" />
      <field name="lastName" />
      <field name="street" />
      <field name="city" />
      <field name="state" />
      <field name="zip" />
    </record>

    <!-- 'target' binds the trailer record to the Integer record count field -->
    <record name="trailer" target="recordCount">
      <!-- 'literal' is used to define constant values -->
      <field name="recordType" rid="true" literal="T" />
      <!-- 'type' can be declared where bean introspection is not possible -->
      <field name="recordCount" type="int" />
    </record>  
    
  </stream>
</beanio>

Using the mapping file and bean object from above, the following code will read and write our CSV contacts file. (For brevity, exception handling is lacking.)

package example;

import org.beanio.*;
import java.io.*;

public class ExampleMain {

    public static void main(String[] args) throws Exception {
        // create a BeanIO StreamFactory
        StreamFactory factory = StreamFactory.newInstance();
        // load the mapping file from the working directory
        factory.load("contacts.xml");
        
        // create a BeanReader to read from "input.csv"
        BeanReader in = factory.createReader("contacts", new File("input.csv"));
        // create a BeanWriter to write to "output.csv"
        BeanWriter out = factory.createWriter("contacts", new File("output.csv"));        
        
        Object record = null;
        
        // read records from "input.csv"
        while ((record = in.read()) != null) {
        
            // process each record
            if ("header".equals(in.getRecordName())) {
                Map<String,Object> header = (Map<String,Object>) record;
                System.out.println(header.get("fileDate"));
            }
            else if ("contact".equals(in.getRecordName())) {
                Contact contact = (Contact) record;
                // process the contact...
            }
            else if ("trailer".equals(in.getRecordName())) {
                Integer recordCount = (Integer) record;
                System.out.println(recordCount + " contacts processed");
            }
            
            // write the record to "output.csv"
            out.write(record);
        }
        
        in.close();
        
        out.flush();
        out.close();
    }
}

That's it! But of course, BeanIO supports many other cool features. For example, if we wanted to strictly validate our contacts input file, we could make the following additions to our mapping file.

<beanio xmlns="http://www.beanio.org/2012/03">

  <!-- 'strict' enforces record order and record sizes -->
  <stream name="contacts" format="csv" strict="true">
    <!-- 'occurs' enforces minimum and maximum record occurrences -->
    <record name="header" class="map" occurs="1">
      <field name="recordType" rid="true" literal="H" />
      <!-- 'required' indicates a field value is required -->
      <field name="fileDate" type="date" format="yyyy-MM-dd" required="true"/>
    </record>  
  
    <record name="contact" class="example.Contact" occurs="0+">
      <field name="recordType" rid="true" literal="D" ignore="true" />
      <!-- 'maxLength' enforces a maximum String length -->
      <field name="firstName" maxLength="20" />
      <field name="lastName" required="true" maxLength="30" />
      <field name="street" maxLength="30" />
      <field name="city" maxLength="25" />
      <field name="state" minLength="2" maxLength="2" />
      <!-- 'regex' enforces pattern matching -->
      <field name="zip" regex="\d{5}" />
    </record>

    <record name="trailer" target="recordCount" occurs="1">
      <field name="recordType" rid="true" literal="T" />
      <field name="recordCount" type="int" required="true" />
    </record>  
    
  </stream>
</beanio>

Prefer to annotate the Contact class instead?

package example;

@Record(minOccurs=0, maxOccurs=-1)
@Fields({
    @Field(at=0, name="recordType", rid=true, literal="D")
})
public class Contact {

    @Field(at=1, maxLength=20)
    String firstName;	
    @Field(at=2, required=true, maxLength=30)
    String lastName;
    @Field(at=3, maxLength=30)
    String street;
    @Field(at=4, maxLength=25)
    String city;
    @Field(at=5, minLength=2, maxLength=2)
    String state;
    @Field(at=6, regex="\d{5}")
    String zip;
    
    // getters and setters not shown...
}

Need to support XML? Simply change the stream format to 'xml', remove the recordType fields, and presto! You can now read and write documents like the following:

<contacts>
  <header>
    <fileDate>2012-05-19</fileDate>
  </header>
  <contact>
    <firstName>Joe</firstName>
    <lastName>Johnson</lastName>
    <street>123 Main St</street>
    <city>Chicago</city>
    <state>IL</state>
    <zip>60610</zip>
  </contact>
  <contact>
    <firstName>Jane</firstName>
    <lastName>Smith</lastName>
    <street/>
    <city/>
    <state/>
    <zip/>
  </contact>
  <trailer>
    <recordCount>2</recordCount>
  </trailer>
</contacts>

But that's not all, check out the reference guide for more information.

2.1 Documentation

2.0 Documentation

1.2 Documentation

A Quick Example