SPSS – Multivariate weighting

Task:

  • We want SPSS data to be weighted according to several independent criteria.
  • e.g. gender, age, region, size of residence, economic activity etc.

Polemic:

  • There is no built tool for this way of weighting directly in SPSS.
  • We aren’t able to use or build our own external weighting application. This kind of application usually needs a special data file with parameters for weighting and after weighting it creates another data file with weights to be imported back to SPSS. User comfort there is not too high.
  • This fact annoyed us and we were looking for an easier solutions.

Solution using SPSS macro:


DEFINE !mac_weight (w_vars = !ENCLOSE('[',']') / w_var = !ENCLOSE('[',']') / N = !ENCLOSE('[',']')  / w_control = !ENCLOSE('[',']') )

  weight off.
  compute !w_var = 1.
  variable label !w_var 'Weight'.
  aggregate
    /outfile=* mode=addvariables overwritevars=yes
    /@total 'Universum' = sum(!w_var).
  compute @real = 1.
  compute @q_real = 100 * @real / @total.
  execute.

  !IF (!N~=!NULL) !THEN
     compute @total = !N.
  !IFEND

  !LET !w_num_param = !NULL
  !DO !w_pom !IN (!w_vars)
    !LET !w_num_param = !CONCAT(!w_num_param,"1")
  !DOEND

  !LET !hlp_vars = !w_vars
  !DO !w_hlp = 1 !TO !UNQUOTE(!LENGTH(!w_num_param)) !BY 3
    !LET !hlp_var    = !HEAD(!hlp_vars)
    !LET !hlp_vars   = !TAIL(!hlp_vars)
    !LET !hlp_code   = !HEAD(!hlp_vars)
    !LET !hlp_vars   = !TAIL(!hlp_vars)
    !LET !hlp_weight = !HEAD(!hlp_vars)
    !LET !hlp_vars   = !TAIL(!hlp_vars)
    recode !hlp_var (!hlp_code = !hlp_weight) into !CONCAT("@w",!hlp_var).
  !DOEND
  execute.

  !LET !hlp_vars = !w_vars
  !LET !w_hlp_crit  = !NULL
  !DO !w_hlp = 1 !TO !UNQUOTE(!LENGTH(!w_num_param)) !BY 3
    !LET !hlp_var  = !HEAD(!hlp_vars)
    !LET !hlp_vars = !TAIL(!TAIL(!TAIL(!hlp_vars)))
    !IF (!w_hlp_crit ~= !hlp_var) !THEN
      !LET !w_hlp_crit = !hlp_var
      compute !CONCAT("@ww",!w_hlp_crit) = !CONCAT("@w",!w_hlp_crit).
      compute !CONCAT("@www",!w_hlp_crit) = !CONCAT("@w",!w_hlp_crit).
      sort cases by !w_hlp_crit.
      if (!w_hlp_crit= lag(!w_hlp_crit)) !CONCAT("@ww",!w_hlp_crit) = lag(!CONCAT("@ww",!w_hlp_crit)).
      if (!w_hlp_crit= lag(!w_hlp_crit)) !CONCAT("@www",!w_hlp_crit) = lag(!CONCAT("@www",!w_hlp_crit)).
      if (!w_hlp_crit<>lag(!w_hlp_crit)) !CONCAT("@ww",!w_hlp_crit) = sum(lag(!CONCAT("@ww",!w_hlp_crit)),!CONCAT("@w",!w_hlp_crit)).
      if (!w_hlp_crit<>lag(!w_hlp_crit)) !CONCAT("@www",!w_hlp_crit) = sum(lag(!CONCAT("@www",!w_hlp_crit)),!CONCAT("@w",!w_hlp_crit)).
      aggregate
        /outfile=* mode=addvariables overwritevars=yes
        /!CONCAT("@ww",!w_hlp_crit) = max(!CONCAT("@ww",!w_hlp_crit))
        /!CONCAT("@www",!w_hlp_crit) = max(!CONCAT("@www",!w_hlp_crit)).
      compute !CONCAT("@ww",!w_hlp_crit) = @total * !CONCAT("@w",!w_hlp_crit) / !CONCAT("@ww",!w_hlp_crit).
      compute !CONCAT("@www",!w_hlp_crit) = !CONCAT("@w",!w_hlp_crit) / !CONCAT("@www",!w_hlp_crit).
      aggregate
        /outfile=* mode=addvariables overwritevars=yes
        /break = !w_hlp_crit
        /@number_of_resp = sum(!w_var).
      compute !CONCAT("@q_ww",!w_hlp_crit) = !CONCAT("@ww",!w_hlp_crit) / @number_of_resp.
      compute !CONCAT("@q_www",!w_hlp_crit) = 100 * !CONCAT("@www",!w_hlp_crit) / @number_of_resp.
    !IFEND
  !DOEND
  execute.

  !LET !w_hlp_crit  = !NULL
  !DO !w_hlp1 = 1 !TO !UNQUOTE(!LENGTH(!w_num_param)) !BY 3
    !LET !hlp_vars = !w_vars
    !DO !w_hlp2 = 1 !TO !UNQUOTE(!LENGTH(!w_num_param)) !BY 3
      !LET !hlp_var  = !HEAD(!hlp_vars)
      !LET !hlp_vars = !TAIL(!TAIL(!TAIL(!hlp_vars)))
      !IF (!w_hlp_crit ~= !hlp_var) !THEN
        !LET !w_hlp_crit = !hlp_var
        aggregate /outfile=* mode=addvariables overwritevars=yes
          /break = !w_hlp_crit
          /@number_of_resp = sum(!w_var).
        compute !w_var = !w_var * (!CONCAT("@ww",!w_hlp_crit) / @number_of_resp).
        execute.
      !IFEND
    !DOEND
  !DOEND

  !IF (!UPCASE(!w_control)="YES") !THEN
    !IF (!N~=!NULL) !THEN
       compute @total = !N.
    !IFEND
    !LET !hlp_vars = !w_vars
    !LET !w_hlp_crit  = !NULL
    !DO !w_hlp = 1 !TO !UNQUOTE(!LENGTH(!w_num_param)) !BY 3
      !LET !hlp_var  = !HEAD(!hlp_vars)
      !LET !hlp_vars = !TAIL(!TAIL(!TAIL(!hlp_vars)))
      !IF (!w_hlp_crit ~= !hlp_var) !THEN
        !LET !w_hlp_crit = !hlp_var
        compute !CONCAT("@q_",!w_var) = 100 * !w_var / @total.
        ctables
          /vlabels variables=@q_real !CONCAT("@q_www",!w_hlp_crit) !CONCAT("@q_",!w_var) @real !CONCAT("@q_ww",!w_hlp_crit) !w_var display=none
          /vlabels variables=!w_hlp_crit display=label
          /table 
               @q_real [s][sum '% (Real)' F40.2] +
               !CONCAT("@q_www",!w_hlp_crit) [s][sum '% (Quote)' F40.2] +
               !CONCAT("@q_",!w_var) [s][sum '% (Weighted)' F40.2] +
               @real [s][sum 'Number (Real)' F40.2] +
               !CONCAT("@q_ww",!w_hlp_crit) [s][sum 'Number (Quote)' F40.2] +
               !w_var [s][sum 'Number (Weighted)' F40.2] + 
               !w_var [s][minimum 'Min Weight' F40.2] + 
               !w_var [s][maximum 'Max Weight' F40.2]
               by !w_hlp_crit [c]
          /titles title = !QUOTE(!CONCAT("Checking of Weights - ",!w_hlp_crit))
          /slabels position=row
          /categories variables=!w_hlp_crit order=a key=value empty=include total=yes position=before.
      !IFEND
    !DOEND
  !ELSE
    descriptives weight /statistics = min max.
  !IFEND

  !LET !w_hlp_crit  = !NULL
  !LET !hlp_vars = !w_vars
  !DO !w_hlp = 1 !TO !UNQUOTE(!LENGTH(!w_num_param)) !BY 3
    !LET !hlp_var  = !HEAD(!hlp_vars)
    !LET !hlp_vars = !TAIL(!TAIL(!TAIL(!hlp_vars)))
    !IF (!w_hlp_crit ~= !hlp_var) !THEN
      !LET !w_hlp_crit = !hlp_var
      delete variables !CONCAT("@w",!w_hlp_crit) !CONCAT("@ww",!w_hlp_crit) !CONCAT("@www",!w_hlp_crit) !CONCAT("@q_ww",!w_hlp_crit) !CONCAT("@q_www",!w_hlp_crit).
    !IFEND
  !DOEND
  delete variables @total @real @q_real @number_of_resp !CONCAT("@q_",!w_var).

  weight by !w_var.

!ENDDEFINE.

/* example of macro call.
!mac_weight
 N [1000]
 w_control [yes]
 w_var [weight]
 w_vars [
  q1 1 0.5
  q1 2 0.5
  q2 1 0.25
  q2 2 0.4
  q2 3 0.35
 ]
.

18 thoughts on “SPSS – Multivariate weighting

  1. Sorry, I dont want to spam your board, but I found a solution for my problem and the macro works perfectly now! Thanks for the great syntax!

  2. Hi Sedlo,

    the macro is of great help, thank you!
    When I insert my variables to define the mac, I always get the following error: “On the DEFINE command, the characters specified to the !CHAREDND and !ENCLOSE subcommands must be enclosed in single quotes.”
    Can you tell me what I am doing wrong?
    Here’s my syntax command:

    DEFINE !mac_weight (w_vars = !ENCLOSE(‘[‘job 1 12 q1 2 88 position 1 33 q2 2 67’]’) / w_var = !ENCLOSE(‘[‘weight’]’) / N = !ENCLOSE(‘[‘1787′]’) )
    (job and position are the names of the variables)

      • Hi Daniela,
        you mustn’t change the macro definition.
        There is Example how tu use it in the end of souce code aftre the note “/* example of macro call.”

        At the beginning you have to run macro code between commands “DEFINE” and “!ENDDEFINE.” without any changes.
        And then you have to call macro !mac_weight with right parameters.

  3. Hi, is there anyway to constrain the minimum or maximum value of the weights used, say < 2. Thank your in advance.

    Dirk

  4. Hello, Is it possible to use the weighting macro free of charge? We would like to use it for our tasks in the company. Thanks for the response.

  5. Hi, I recently came across your macro for multi-variate weighting and gave it a try. I am testing the macro on multiple datasets and have come across an issue where the macro doesn’t seem to be working to weight the groups to the percentages I indicated in the file. IE.. Instead of weighting to 112 the weight only weights to 88 even though the original sample size (unweighted count) is N=112. Please advise.

  6. Hi Sedlo,

    your macro looks very good and inspiring. Based on your work I have re-build new script to avoid 2 kinds of lacks:
    1) working with target number instead of percentage can shorten the syntax up to 1 half
    2) moving the calculations from main dataset to aggregated one leads to better efficiency (because of smaller ammount of agg, sort, exe calls)

    While working with small data (lets say 150cases), no difference is to be observed. For the bigger ones, it is advantage. My test data has 132.000 cases, initial running time was 92 seconds. For the updated version only 20 seconds.

    Here is the re-builded version. In case of any comment, let me know.
    https://drive.google.com/file/d/0B2VxIms7j1-ZLWg1NzF1WmVPeUE/view?usp=sharing

  7. I am new to multilevel weighting and I apologize if I’m reporting a false issue.
    In my SPSS (version 21) I get the following warning:
    Text: @q_Weight
    Command: delete variables
    An undefined variable name, or a scratch or system variable was specified in a variable list which accepts only standard variables. Check spelling and verify the existence of this variable.
    Execution of this command stops.

    Actually temporary variables are not deleted, but I hope the rest is fine, that is weight variable is computed correctly.
    Thanks

    • Hello,
      @q_Weight is helping variable used for control outputs. For sure you can send me your modified syntax together with datafile. Use contact form.

  8. Note that there is a free extension command, SPSSINC RAKE, available from the SPSS Community website (http://www.ibm.com/developerworks/spssdevcentral) that rakes weights to control totals in up to 10 dimensions and provides a measure of sample efficiency.

    It includes a dialog box interface and standard SPSS syntax. This extension requires the Python plugin and the Advanced Statistics option. As of Statistics version 22, this procedure is installed as part of the Python Essentials.

    • Dear Jon,

      we tried to use your advice with RAKE command and it works. Unfortunately, need of the Advanced Statistics module is a big disadvantage of your solution. 🙁 Vladimír’s solution works in SPSS Basic with no requirement of other modules.

Leave a Comment