Skip to content

Schema for Data Generation

The schema to configure the input file for data generation.

Schema:

### Schema for YAML configuration

stages:                       ### ANCHOR: Define stages to run
  type: list
  required: True
  allowed:
    - make_structure            # build the atomic structures
    - optimize_structure        # optimize the structure
    - sampling_space            # explore the sampling space
    - run_dft                   # run the DFTsinglepoint/AIMD simulation
    - collect_data              # collect the data


structure:                    ### ANCHOR: Define atomic structure
  type: dict
  required: True
  schema:
    from_extxyz:                # list-of-paths to the EXTXYZ files to be used as the initial structure. If provided, the structure will be read from the file, and the other structure parameters will be ignored.
      type: list

    from_scratch:               # build the structure from scratch. See [Schema for building structure](https://thangckt.github.io/alff_doc/schema/manual_schema_ase_build/)
      type: dict

    make_triangular_form:       # normalize the cell to upper/lower triangular matrix, to able to use NPT ensemble. Default: None.
      type: string
      allowed: ['upper', 'lower']


sampling_space:               ### ANCHOR: Define sampling space:
                                # Can run `scaled structures` in ranges of temperatures; Or sampling `original structures` in ranges of temperatues and stressess.
  type: dict
  schema:
    scale:                    ### sampling scaled structures
      type: dict
      schema:
        range_x:                ## range of displacements in x-direction. E.g., {'start': 0.0, 'stop': 0.1, 'step': 0.01}
          type: dict
          schema:
            start:              # start value of the scan range
              type: float
            stop:               # stop value of the scan range
              type: float
            step:               # step value of the scan range
              type: float
        range_y:                ## range of displacements in y-direction. E.g., {'start': 0.0, 'stop': 0.1, 'step': 0.01}
          type: dict
          schema:
            start:              # start value of the scan range
              type: float
            stop:               # stop value of the scan range
              type: float
            step:               # step value of the scan range
              type: float
        range_z:                ## range of displacements in z-direction. E.g., {'start': -0.01, 'stop': 0.01, 'step': 0.01}
          type: dict
          schema:
            start:              # start value of the scan range
              type: float
            stop:               # stop value of the scan range
              type: float
            step:               # step value of the scan range
              type: float
        temps:                  ## list of temperatures in K. E.g., [300, 600]
          type: list

    temp_press:                 ### sampling in temperature and stress
      type: dict
      schema:
        temps:                  # list of temperatures in K
          type: list
        pressures:              # list of stress in GPa
          type: list


dft:                          ### ANCHOR: DFT calculators
  type: dict
  schema:
    calc_args:                  # accept all keywords for ['ase.calc'](https://thangckt.github.io/alff_doc/schema/config_ase/)
      type: dict
      required: True
      schema:
        gpaw:                   # GPAW calculator parameters
          type: dict
        dftd3:                  # DFT-D3 calculator for Van der Waals correction
          type: dict

    optimize:                   # run DFT to optimize the structure, see keywords in ['ase.optimize'](https://thangckt.github.io/alff_doc/schema/config_ase/)
      type: dict

    md:                         # run AIMD simulation, see keywords in ['ase.md'](https://thangckt.github.io/alff_doc/schema/config_ase/)
      type: dict

Example config 1:

### Example configuration file for generating DFT data with ALFF

stages:
  - make_structure       # build the atomic structures
  - optimize_structure    # optimize the structure
  - sampling_space        # explore the sampling space
  - run_dft               # run the DFTsinglepoint/AIMD simulation
  - collect_data          # collect the data


structure:  # atomic structure information
  # from_extxyz: ["path/to/extxyz_file"]  # list-of-paths to the EXTXYZ files to be used as the initial structure. If provided, the structure will be read from the file, and the other structure parameters will be ignored.

  from_scratch:
    # structure_type: "bulk"    # bulk, molecule, surface,
    # chem_formula: "Mo"        # chemical formula/element. e.g., "H2O", "Mg2O2", "Mg",
    # supercell: [ 1, 1, 1 ]    #  size of the supercell
    # pbc: [1, 1, 1]

    # ase_arg:                  # ASE kwargs for building the structure. https://wiki.fysik.dtu.dk/ase/ase/build/build.html#
    #   crystalstructure: "bcc" # choices: sc,fcc,bcc,tetragonal,bct,hcp,rhombohedral,orthorhombic,mcl,diamond,zincblende,rocksalt,cesiumchloride,fluorite,wurtzite.
    #   a: 3.98  # lattice constant
    #   # cubic: True

    structure_type: "mx2"
    chem_formula: "WS2"
    supercell: [ 2, 2, 1 ]
    pbc: [1, 1, 1]
    # add_vacuum: [0, 0, 25]
    ase_build_arg:
      kind: '2H'                # '2H' or '1T' for MX2 structure
      a: 3.18
      thickness: 3.19
      vacuum: 1.7

  # make_triangular_form: 'lower'     # normalize the cell to upper/lower triangular matrix, to able to use NPT ensemble


sampling_space:                 # sampling `scaled structures` in ranges of temperatures; Or sampling `original structures` in ranges of temperatues and stressess.
  scale:
    range_x: {'start': 0.9, 'stop': 1.1, 'step': 0.05}     # range of dislacements in x-direction.
    # range_y: {'start': 0.9, 'stop': 1.1, 'step': 0.05}
    temps: [300, 600, 900]       # list of temperatures in K


dft:
  calc_args:
    gpaw:                       ### accept GPAW parameters
      mode:
        name: 'pw'              # use PlaneWave method energy cutoff in eV
        ecut: 500
      xc: "PBE"                 # exchange-correlation functional
      kpts:
        density: 6
        gamma: False            # if not set `kpts`, then only Gamma-point is used
      parallel:
        # sl_auto: True         # enable ScaLAPACK parallelization
        use_elpa: True          # enable Elpa eigensolver
        # augment_grids: True   # use all cores for XC/Poisson solver

    dftd3:                      ### DFT-D3 method for Van der Waals correction
      damping: "d3zero"         # use DFT-D3 damping. Default is "d3zero" (zero-damping). Choices: "d3bj","d3zero","d3bjm","d3zerom","d3op".

  optimize:                     # run DFT to optimize the structure
    fmax: 0.05                  # force convergence criteria
    mask: [1, 1, 1, 0, 0, 0]    # strain components to optimize. -> fixed zz

  md:                           # run AIMD simulation
    dt: 1.0                     # time step in fs
    num_frames: 10              # number of frames to be collected for each MD run. Then nsteps = num_frames * traj_freq
    traj_freq: 1                # dump the frames every `traj_freq` steps
    equil_steps: 0              # number of equilibration steps before production run. Default is 0 steps

    thermostat: 'langevin'      # tChoices: 'langevin', 'nose_hoover', 'nose_hoover_chain'
    # tdamp: 100                # damping timesteps for Nose_Hoover thermostat. Default is 50.
    langevin_friction: 0.002    # friction coefficient for Langevin thermostat. Default is 0.002 fs^-1